From 11fa4de37a14e314e81ae7f08a2dee1c311a0794 Mon Sep 17 00:00:00 2001 From: Aareon Sullivan Date: Tue, 17 Sep 2024 19:09:37 -0500 Subject: [PATCH] Format and isort --- update_allCountries.py | 90 ++++++++++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 33 deletions(-) diff --git a/update_allCountries.py b/update_allCountries.py index fe21d8d..ecde30f 100644 --- a/update_allCountries.py +++ b/update_allCountries.py @@ -1,16 +1,16 @@ import asyncio import csv import hashlib -import zipfile +import os import re +import time +import zipfile from datetime import datetime, timezone from pathlib import Path -import time -import os -from tqdm import tqdm import httpx from loguru import logger +from tqdm import tqdm GEONAMES_URL = "https://download.geonames.org/export/zip/allCountries.zip" LOCAL_FILE = Path("allCountries.zip") @@ -33,36 +33,44 @@ async def check_for_updates() -> bool: return remote_time > local_time return True + async def download_file(): async with httpx.AsyncClient() as client: async with client.stream("GET", GEONAMES_URL) as response: response.raise_for_status() total_size = int(response.headers.get("Content-Length", 0)) - + chunk_size = 1024 * 1024 # 1 MB chunks downloaded = 0 start_time = time.time() last_log_time = start_time log_interval = 5 # Log every 5 seconds - + with open(LOCAL_FILE, "wb") as f, tqdm( - total=total_size, unit='iB', unit_scale=True, desc="Downloading" + total=total_size, unit="iB", unit_scale=True, desc="Downloading" ) as progress_bar: async for chunk in response.aiter_bytes(chunk_size): size = f.write(chunk) downloaded += size progress_bar.update(size) - + current_time = time.time() if current_time - last_log_time >= log_interval: elapsed_time = current_time - start_time speed = downloaded / elapsed_time / 1024 / 1024 # MB/s - percent = (downloaded / total_size) * 100 if total_size > 0 else 0 - logger.info(f"Downloaded: {downloaded/1024/1024:.2f} MB / {total_size/1024/1024:.2f} MB " - f"({percent:.2f}%) - Speed: {speed:.2f} MB/s") + percent = ( + (downloaded / total_size) * 100 if total_size > 0 else 0 + ) + logger.info( + f"Downloaded: {downloaded/1024/1024:.2f} MB / {total_size/1024/1024:.2f} MB " + f"({percent:.2f}%) - Speed: {speed:.2f} MB/s" + ) last_log_time = current_time - logger.info(f"Download completed. Total size: {total_size/1024/1024:.2f} MB") + logger.info( + f"Download completed. Total size: {total_size/1024/1024:.2f} MB" + ) + def calculate_md5(filename: Path) -> str: hash_md5 = hashlib.md5() @@ -71,10 +79,12 @@ def calculate_md5(filename: Path) -> str: hash_md5.update(chunk) return hash_md5.hexdigest() + def extract_zip(): with zipfile.ZipFile(LOCAL_FILE, "r") as zip_ref: zip_ref.extractall() + def get_statistics(): total_entries = 0 countries = set() @@ -95,10 +105,12 @@ def get_statistics(): "md5_checksum": md5_checksum, } + def format_file_size(size_bytes): size_mb = size_bytes / (1024 * 1024) return f"{size_mb:.2f} MB" + def create_release_notes(stats, is_update): update_status = "Update" if is_update else "No changes" current_date = datetime.now().strftime("%Y-%m-%d") @@ -112,33 +124,39 @@ def create_release_notes(stats, is_update): This release contains the latest GeoNames database {update_status.lower()}. """ + async def get_previous_checksum(): GITHUB_API_URL = "https://api.github.com/repos/Aareon/GeoNamesMirror/releases" GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN") - + if not GITHUB_TOKEN: logger.error("GITHUB_TOKEN environment variable is not set.") return None try: async with httpx.AsyncClient() as client: - response = await client.get(GITHUB_API_URL, headers={ - "Accept": "application/vnd.github+json", - "Authorization": f"Bearer {GITHUB_TOKEN}", - "X-GitHub-Api-Version": "2022-11-28" - }) + response = await client.get( + GITHUB_API_URL, + headers={ + "Accept": "application/vnd.github+json", + "Authorization": f"Bearer {GITHUB_TOKEN}", + "X-GitHub-Api-Version": "2022-11-28", + }, + ) response.raise_for_status() releases = response.json() - + if not releases: - logger.info("No previous releases found. This will be the first release.") + logger.info( + "No previous releases found. This will be the first release." + ) return None - + latest_release = releases[0] - body = latest_release.get('body', '') - + body = latest_release.get("body", "") + # Extract MD5 checksum from the release notes - match = re.search(r'MD5 Checksum: ([a-fA-F0-9]{32})', body) + match = re.search(r"MD5 Checksum: ([a-fA-F0-9]{32})", body) if match: return match.group(1) else: @@ -147,9 +165,12 @@ async def get_previous_checksum(): except httpx.HTTPStatusError as e: logger.error(f"HTTP error occurred: {e}") except Exception as e: - logger.error(f"An unexpected error occurred while fetching previous releases: {e}") + logger.error( + f"An unexpected error occurred while fetching previous releases: {e}" + ) return None + async def main(): current_checksum = None try: @@ -158,10 +179,10 @@ async def main(): await download_file() extract_zip() stats = get_statistics() - + previous_checksum = await get_previous_checksum() - current_checksum = stats['md5_checksum'] - + current_checksum = stats["md5_checksum"] + is_update = previous_checksum != current_checksum release_notes = create_release_notes(stats, is_update) @@ -173,17 +194,19 @@ async def main(): # Write release notes and update status to files for GitHub Actions to use with open("release_notes.txt", "w") as f: f.write(release_notes) - + with open("update_status.txt", "w") as f: f.write("update" if is_update else "no_update") - + # Write release title to a separate file with open("release_title.txt", "w") as f: - f.write(release_notes.split('\n')[0]) + f.write(release_notes.split("\n")[0]) logger.info(f"Process complete. Release notes:\n{release_notes}") else: - logger.info(f"Geonames data is up to date.{' Checksum: ' + current_checksum + '.' if current_checksum else ''} Last modified: {datetime.fromtimestamp(LOCAL_FILE.stat().st_mtime)}") + logger.info( + f"Geonames data is up to date.{' Checksum: ' + current_checksum + '.' if current_checksum else ''} Last modified: {datetime.fromtimestamp(LOCAL_FILE.stat().st_mtime)}" + ) except httpx.HTTPError as e: logger.error(f"HTTP error occurred: {e}") except IOError as e: @@ -191,5 +214,6 @@ async def main(): except Exception as e: logger.error(f"An unexpected error occurred: {e}") + if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main())