I wanted to download data from Brazilian government. So I coded it below.

Source: https://github.com/erickmattoso/auxilio_emergencial

# LibsImport Bibliotecas
import requests
import pandas as pd
import time
import numpy as np

# Data bases
## Read cities codes (IBGE Code)
ibge_code  = pd.read_excel("../data/raw/RELATORIO_DTB_BRASIL_MUNICIPIO.xls")
## Read my key to access the Brazilian Public Data API
key = pd.read_csv("../data/raw/key.txt", header=None,sep='\t')

# saving only the code
codIBGE = ibge_code['Código Município Completo'].to_list()

# Creating data Batchs, because it will help me to downloand data and save partialy
batch = np.array_split(codIBGE, int(len(codIBGE)/round(len(codIBGE)/20)))

# Dates I want to request
period_list = [202004, 202005, 202006, 202007, 202008, 202009, 202010, 202011, 202012]

# Building the URL
irl = "http://api.portaldatransparencia.gov.br/api-de-dados/auxilio-emergencial-por-municipio?"

# Data base
count = pd.read_csv("../data/raw/count.csv")
x=count["count"].max()
print(x)

# XPTO
for i, list_batch in enumerate(batch[0:]):
    results=[]
    error = []
    
    for mesAno in (period_list):
        print(mesAno)

        for codigoIbge in (list_batch):
           
            # I needed to add 'seconds' because the API has a limited number of queries by minute
            pagina = 1
            seconds=0.7
            url = irl+f"codigoIbge={codigoIbge}&mesAno={mesAno}&pagina={pagina}"
            headers = {"chave-api-dados":key}
            print(url)
            
            # Request data from API
            try:
                resultado = requests.get(url, time.sleep(seconds), headers=headers)
                results.append(resultado)
                print(codigoIbge)
            
            # case error, save the code.
            except:
                error.append(codigoIbge)
                print('error')

        # Saving partial results
        pd.DataFrame(results).to_csv('data/df_transparencia_true_' +str(i)+'.csv')
        pd.DataFrame(error).to_csv('data/df_transparencia_error_'+str(i)+'.csv')

Leave a Reply