From a079fe794b8a42aebc252f9ad73c322772a797d2 Mon Sep 17 00:00:00 2001 From: Victor-oss Date: Thu, 2 Nov 2023 13:22:12 -0300 Subject: [PATCH] =?UTF-8?q?Mudan=C3=A7as=20de=20clean=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .coveragerc | 2 +- .github/workflows/atualiza_tabelas.yml | 2 +- .github/workflows/linter.yml | 2 +- .../__pycache__/excelScrapperIEA.cpython-310.pyc | Bin 1465 -> 0 bytes scripts/{ => handlers}/ieaHandler.py | 11 +++++------ scripts/main.py | 7 ++----- .../{Scrappers => scrappers}/excelScrapperIEA.py | 11 +++++------ 7 files changed, 15 insertions(+), 20 deletions(-) delete mode 100644 scripts/Scrappers/__pycache__/excelScrapperIEA.cpython-310.pyc rename scripts/{ => handlers}/ieaHandler.py (91%) rename scripts/{Scrappers => scrappers}/excelScrapperIEA.py (87%) diff --git a/.coveragerc b/.coveragerc index 6b91ebe..5ec0d5d 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,2 +1,2 @@ [run] -omit = scripts/tests/*,scripts/epe_matriz.py,*/__init__.py,scripts/main.py,scripts/handlers/benHandler.py,scripts/handlers/pneHandler.py \ No newline at end of file +omit = scripts/tests/*,scripts/epe_matriz.py,*/__init__.py,scripts/main.py,scripts/handlers/benHandler.py,scripts/handlers/pneHandler.py,scripts/scrappers/excelScrapperIEA.py,scripts/handlers/ieaHandler.py \ No newline at end of file diff --git a/.github/workflows/atualiza_tabelas.yml b/.github/workflows/atualiza_tabelas.yml index 5c53b54..3c9f444 100644 --- a/.github/workflows/atualiza_tabelas.yml +++ b/.github/workflows/atualiza_tabelas.yml @@ -1,7 +1,7 @@ name: run main.py on: - schedule: [{cron: "0 0 * * 5-6"}] #agendamento + schedule: [{cron: "0 0 * * 5"}] #agendamento jobs: build: diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index ab97e1b..52f4ae2 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -22,4 +22,4 @@ jobs: - name: Lint flake8 run: | flake8 scripts --ignore=E203,W291,W293 --max-line-length=240 --show-source --statistics - find . -type f -name "*.py" -exec pylint --disable=C0103,C0301,C0114,W0703,W0622,R0913,E1101,R0903,R0915,R0912,W0612,E0401,R0914 -j 0 --exit-zero {} \; \ No newline at end of file + find . -type f -name "*.py" -exec pylint --disable=C0103,C0301,C0114,W0703,W0622,R0913,E1101,R0903,R0915,R0912,W0612,E0401,R0914,C0115,C0116,W0511 -j 0 --exit-zero {} \; \ No newline at end of file diff --git a/scripts/Scrappers/__pycache__/excelScrapperIEA.cpython-310.pyc b/scripts/Scrappers/__pycache__/excelScrapperIEA.cpython-310.pyc deleted file mode 100644 index f89a528fe0a45e32e21a12369b2ffe2d342625fa..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1465 zcmah}Pmdcl6t_K|KS?IL>9%y+OAm+xQPED3+Dk!*wkoKX)ru4$g6L=xJI!P#W7I}GWTOtQ(6y(ar z%_SkV$xGGuEZ4G_0H&Tmh~#GiF@AoR$rMc|l@y?oBTzF`*U7`Lo(vu&ckZS~c_q=Z zeER6?|JiQyp%eK|dJd`=HcHdH%uSj`=u|t;0c|D=Xqzr?zMIm6?R^{IgK8+p6m_;E zK*kwra{L=IhMZ$IXY7KW(G{IxZjTwe&+hm}OY{KQ1?ik*2YPlPvJ zT|yI~cDSaLnAUk)G`kCS-Vv<}SEaZ)kKMVu2QS|E3(+56@i+b!ri-ULxOVs5y?CFj zopulT-t5Y3ukqe~#n8)DbInQDRCLzt$K5l3<)6QYmabo*k8cny9Ov4>oe#*zM8g8* zZhni)$wZ8Z=^noR1z*4HOwGJFs4~zJEX|(wnT@u|{V1rEz)}J3Eh%Hx@EUl*v zSbqVzkv7EEFc@V&fR0nZ&~$~}A`#kX+3s#%oQ`T^RB5lg75sg3k{7Z8{)Tj&q3$Tl zPqS@z^-$FGZA{xNrq>XjZF(J7fV(!mLHBU((Dt94_5R?Dv*oRSf%hK<53z|6{$++8 zoZ5Dp&XuSO$H!@UQfI~H1a@33e56zXA0c}wO4lynLIsY*b+6uZKt%VVM%EAEhC9E7 p#CjurcT*RvtC<%0Xp?ZK{9jgW^ywy~&m0`It42g38spsa{sI None: """ excel_file = pd.ExcelFile(self.path + "/constants/IEA/" + self.file) df_iea_sheet_principal = pd.read_excel(excel_file, sheet_name=3) - lista_indices_linhas = df_iea_sheet_principal.index[df_iea_sheet_principal.iloc[:,2] == 'Total energy supply ' \ - '(PJ)'].tolist() + lista_indices_linhas = df_iea_sheet_principal.index[df_iea_sheet_principal.iloc[:, 2] == 'Total energy supply ' '(PJ)'].tolist() df_novo_csv = df_iea_sheet_principal.iloc[lista_indices_linhas] - nome_colunas_novas = ['PAIS','PRODUTO','DADO_TIPO'] + nome_colunas_novas = ['PAIS', 'PRODUTO', 'DADO_TIPO'] df_novo_csv.columns.values[:3] = nome_colunas_novas df_novo_csv = df_novo_csv.drop(df_novo_csv.columns[3:6], axis=1) - df_novo_csv.columns.values[3:] = df_iea_sheet_principal.iloc[0,6:] + df_novo_csv.columns.values[3:] = df_iea_sheet_principal.iloc[0, 6:] - df_novo_csv.replace('..', 0,inplace=True) + df_novo_csv.replace('..', 0, inplace=True) df_novo_csv.to_csv(self.path + "/constants/IEA/" + "tabelas_paises_TES.csv", index=False) diff --git a/scripts/main.py b/scripts/main.py index 8c39fbc..4eb4d59 100644 --- a/scripts/main.py +++ b/scripts/main.py @@ -11,7 +11,7 @@ from scrappers.excelScrapperByHref import ExcelScrapperByHref from scrappers.excelScrapperAnuarioANP import ExcelScrapperAnuarioANP from scrappers.zipScrapper import ZipScrapper -from Scrappers.excelScrapperIEA import ExcelScrapperIEA +from scripts.scrappers.excelScrapperIEA import ExcelScrapperIEA if __name__ == '__main__': year_atual = datetime.now().year @@ -140,10 +140,7 @@ break year -= 1 cont += 1 - - ''' - Extração arquivo IEA - ''' + url_site = "https://www.iea.org/data-and-statistics/data-product/world-energy-statistics-and-balances" scrapperIea = ExcelScrapperIEA(url_site, path_raiz) nome_arquivo = scrapperIea.baixa_arquivo() diff --git a/scripts/Scrappers/excelScrapperIEA.py b/scripts/scrappers/excelScrapperIEA.py similarity index 87% rename from scripts/Scrappers/excelScrapperIEA.py rename to scripts/scrappers/excelScrapperIEA.py index 905399b..aca5d42 100644 --- a/scripts/Scrappers/excelScrapperIEA.py +++ b/scripts/scrappers/excelScrapperIEA.py @@ -1,7 +1,7 @@ -import requests import re -from bs4 import BeautifulSoup from pathlib import Path +import requests +from bs4 import BeautifulSoup class ExcelScrapperIEA: @@ -13,6 +13,7 @@ def __init__(self, url, path_raiz): self.download_link = None self.diretorio = '/scripts/constants/IEA/' self.path_destino = None + def baixa_arquivo(self) -> str: response = requests.get(self.url) # Verifica se a requisição foi bem-sucedida (código 200) @@ -32,7 +33,5 @@ def baixa_arquivo(self) -> str: with open(str(self.path_destino), 'wb') as file: file.write(download_response.content) return self.nome_arquivo - else: - raise ValueError(f"Não foi possivel baixar o arquivo {self.nome_arquivo} \n") - else: - raise ValueError(f"Não existe link para download \n") + raise ValueError(f"Não foi possivel baixar o arquivo {self.nome_arquivo} \n") + raise ValueError("Não existe link para download \n")