diff --git a/.coveragerc b/.coveragerc index 6b91ebe..5ec0d5d 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,2 +1,2 @@ [run] -omit = scripts/tests/*,scripts/epe_matriz.py,*/__init__.py,scripts/main.py,scripts/handlers/benHandler.py,scripts/handlers/pneHandler.py \ No newline at end of file +omit = scripts/tests/*,scripts/epe_matriz.py,*/__init__.py,scripts/main.py,scripts/handlers/benHandler.py,scripts/handlers/pneHandler.py,scripts/scrappers/excelScrapperIEA.py,scripts/handlers/ieaHandler.py \ No newline at end of file diff --git a/.github/workflows/atualiza_tabelas.yml b/.github/workflows/atualiza_tabelas.yml index 5c53b54..3c9f444 100644 --- a/.github/workflows/atualiza_tabelas.yml +++ b/.github/workflows/atualiza_tabelas.yml @@ -1,7 +1,7 @@ name: run main.py on: - schedule: [{cron: "0 0 * * 5-6"}] #agendamento + schedule: [{cron: "0 0 * * 5"}] #agendamento jobs: build: diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index ab97e1b..52f4ae2 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -22,4 +22,4 @@ jobs: - name: Lint flake8 run: | flake8 scripts --ignore=E203,W291,W293 --max-line-length=240 --show-source --statistics - find . -type f -name "*.py" -exec pylint --disable=C0103,C0301,C0114,W0703,W0622,R0913,E1101,R0903,R0915,R0912,W0612,E0401,R0914 -j 0 --exit-zero {} \; \ No newline at end of file + find . -type f -name "*.py" -exec pylint --disable=C0103,C0301,C0114,W0703,W0622,R0913,E1101,R0903,R0915,R0912,W0612,E0401,R0914,C0115,C0116,W0511 -j 0 --exit-zero {} \; \ No newline at end of file diff --git a/scripts/Scrappers/__pycache__/excelScrapperIEA.cpython-310.pyc b/scripts/Scrappers/__pycache__/excelScrapperIEA.cpython-310.pyc deleted file mode 100644 index f89a528..0000000 Binary files a/scripts/Scrappers/__pycache__/excelScrapperIEA.cpython-310.pyc and /dev/null differ diff --git a/scripts/ieaHandler.py b/scripts/handlers/ieaHandler.py similarity index 91% rename from scripts/ieaHandler.py rename to scripts/handlers/ieaHandler.py index 1fe4a75..382f266 100644 --- a/scripts/ieaHandler.py +++ b/scripts/handlers/ieaHandler.py @@ -1,5 +1,5 @@ -import pandas as pd from pathlib import Path +import pandas as pd class IeaHandler: @@ -83,16 +83,15 @@ def formatar_xlsx_IEA(self) -> None: """ excel_file = pd.ExcelFile(self.path + "/constants/IEA/" + self.file) df_iea_sheet_principal = pd.read_excel(excel_file, sheet_name=3) - lista_indices_linhas = df_iea_sheet_principal.index[df_iea_sheet_principal.iloc[:,2] == 'Total energy supply ' \ - '(PJ)'].tolist() + lista_indices_linhas = df_iea_sheet_principal.index[df_iea_sheet_principal.iloc[:, 2] == 'Total energy supply ' '(PJ)'].tolist() df_novo_csv = df_iea_sheet_principal.iloc[lista_indices_linhas] - nome_colunas_novas = ['PAIS','PRODUTO','DADO_TIPO'] + nome_colunas_novas = ['PAIS', 'PRODUTO', 'DADO_TIPO'] df_novo_csv.columns.values[:3] = nome_colunas_novas df_novo_csv = df_novo_csv.drop(df_novo_csv.columns[3:6], axis=1) - df_novo_csv.columns.values[3:] = df_iea_sheet_principal.iloc[0,6:] + df_novo_csv.columns.values[3:] = df_iea_sheet_principal.iloc[0, 6:] - df_novo_csv.replace('..', 0,inplace=True) + df_novo_csv.replace('..', 0, inplace=True) df_novo_csv.to_csv(self.path + "/constants/IEA/" + "tabelas_paises_TES.csv", index=False) diff --git a/scripts/main.py b/scripts/main.py index 8c39fbc..4eb4d59 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -11,7 +11,7 @@ from scrappers.excelScrapperByHref import ExcelScrapperByHref from scrappers.excelScrapperAnuarioANP import ExcelScrapperAnuarioANP from scrappers.zipScrapper import ZipScrapper -from Scrappers.excelScrapperIEA import ExcelScrapperIEA +from scripts.scrappers.excelScrapperIEA import ExcelScrapperIEA if __name__ == '__main__': year_atual = datetime.now().year @@ -140,10 +140,7 @@ break year -= 1 cont += 1 - - ''' - Extração arquivo IEA - ''' + url_site = "https://www.iea.org/data-and-statistics/data-product/world-energy-statistics-and-balances" scrapperIea = ExcelScrapperIEA(url_site, path_raiz) nome_arquivo = scrapperIea.baixa_arquivo() diff --git a/scripts/Scrappers/excelScrapperIEA.py b/scripts/scrappers/excelScrapperIEA.py similarity index 87% rename from scripts/Scrappers/excelScrapperIEA.py rename to scripts/scrappers/excelScrapperIEA.py index 905399b..aca5d42 100644 --- a/scripts/Scrappers/excelScrapperIEA.py +++ b/scripts/scrappers/excelScrapperIEA.py @@ -1,7 +1,7 @@ -import requests import re -from bs4 import BeautifulSoup from pathlib import Path +import requests +from bs4 import BeautifulSoup class ExcelScrapperIEA: @@ -13,6 +13,7 @@ def __init__(self, url, path_raiz): self.download_link = None self.diretorio = '/scripts/constants/IEA/' self.path_destino = None + def baixa_arquivo(self) -> str: response = requests.get(self.url) # Verifica se a requisição foi bem-sucedida (código 200) @@ -32,7 +33,5 @@ def baixa_arquivo(self) -> str: with open(str(self.path_destino), 'wb') as file: file.write(download_response.content) return self.nome_arquivo - else: - raise ValueError(f"Não foi possivel baixar o arquivo {self.nome_arquivo} \n") - else: - raise ValueError(f"Não existe link para download \n") + raise ValueError(f"Não foi possivel baixar o arquivo {self.nome_arquivo} \n") + raise ValueError("Não existe link para download \n")