diff --git a/.github/workflows/update_allCountries.yml b/.github/workflows/update_allCountries.yml index ddb49ad..7d3dae4 100644 --- a/.github/workflows/update_allCountries.yml +++ b/.github/workflows/update_allCountries.yml @@ -25,7 +25,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install httpx loguru + pip install httpx loguru tqdm - name: Run update script run: python update_allCountries.py diff --git a/.gitignore b/.gitignore index 9578a17..6ed2289 100644 --- a/.gitignore +++ b/.gitignore @@ -165,5 +165,7 @@ cython_debug/ allCountries.txt allCountries.zip -# Auto-generated release notes -release_notes.txt \ No newline at end of file +# Auto-generated release info +release_notes.txt +release_title.txt +update_status.txt diff --git a/poetry.lock b/poetry.lock index ca759be..ed81cd3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "anyio" @@ -287,6 +287,26 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tqdm" +version = "4.66.5" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"}, + {file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -315,4 +335,4 @@ dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "3cd3cd6c5ca87ce6c6386f19a3454799796b20559f74147cf73c29fb8b5a5bcc" +content-hash = "5b72b745110fa61465b9ae7f57e1df1c7f130628c8d7a8ea94abd4740c177c6f" diff --git a/pyproject.toml b/pyproject.toml index 70addd2..667225d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" python = "^3.10" httpx = "^0.27.2" loguru = "^0.7.2" +tqdm = "^4.66.5" [tool.poetry.group.test.dependencies] diff --git a/update_allCountries.py b/update_allCountries.py index 9894327..befe467 100644 --- a/update_allCountries.py +++ b/update_allCountries.py @@ -5,14 +5,17 @@ import re from datetime import datetime, timezone from pathlib import Path +import time +from tqdm import tqdm import httpx from loguru import logger GEONAMES_URL = "https://download.geonames.org/export/zip/allCountries.zip" LOCAL_FILE = Path("allCountries.zip") EXTRACTED_FILE = Path("allCountries.txt") -GITHUB_API_URL = "https://api.github.com/repos/Aareon/GeoNamesMirror/releases/latest" +GITHUB_API_URL = "https://api.github.com/repos/Aareon/GeoNamesMirror/releases" + async def check_for_updates() -> bool: if LOCAL_FILE.exists(): @@ -34,13 +37,31 @@ async def download_file(): async with client.stream("GET", GEONAMES_URL) as response: response.raise_for_status() total_size = int(response.headers.get("Content-Length", 0)) - - with LOCAL_FILE.open("wb") as f: - downloaded = 0 - async for chunk in response.aiter_bytes(): - f.write(chunk) - downloaded += len(chunk) - logger.info(f"Downloaded {downloaded}/{total_size} bytes") + + chunk_size = 1024 * 1024 # 1 MB chunks + downloaded = 0 + start_time = time.time() + last_log_time = start_time + log_interval = 5 # Log every 5 seconds + + with open(LOCAL_FILE, "wb") as f, tqdm( + total=total_size, unit='iB', unit_scale=True, desc="Downloading" + ) as progress_bar: + async for chunk in response.aiter_bytes(chunk_size): + size = f.write(chunk) + downloaded += size + progress_bar.update(size) + + current_time = time.time() + if current_time - last_log_time >= log_interval: + elapsed_time = current_time - start_time + speed = downloaded / elapsed_time / 1024 / 1024 # MB/s + percent = (downloaded / total_size) * 100 if total_size > 0 else 0 + logger.info(f"Downloaded: {downloaded/1024/1024:.2f} MB / {total_size/1024/1024:.2f} MB " + f"({percent:.2f}%) - Speed: {speed:.2f} MB/s") + last_log_time = current_time + + logger.info(f"Download completed. Total size: {total_size/1024/1024:.2f} MB") def calculate_md5(filename: Path) -> str: hash_md5 = hashlib.md5() @@ -91,19 +112,37 @@ def create_release_notes(stats, is_update): """ async def get_previous_checksum(): - async with httpx.AsyncClient() as client: - response = await client.get(GITHUB_API_URL) - response.raise_for_status() - release_data = response.json() - body = release_data.get('body', '') - - # Extract MD5 checksum from the release notes - match = re.search(r'MD5 Checksum: ([a-fA-F0-9]{32})', body) - if match: - return match.group(1) + try: + async with httpx.AsyncClient() as client: + response = await client.get(GITHUB_API_URL, headers={ + "Accept": "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28" + }) + response.raise_for_status() + releases = response.json() + + if not releases: + logger.info("No previous releases found. This will be the first release.") + return None + + latest_release = releases[0] + body = latest_release.get('body', '') + + # Extract MD5 checksum from the release notes + match = re.search(r'MD5 Checksum: ([a-fA-F0-9]{32})', body) + if match: + return match.group(1) + else: + logger.warning("MD5 checksum not found in the latest release notes.") + return None + except httpx.HTTPStatusError as e: + logger.error(f"HTTP error occurred: {e}") + except Exception as e: + logger.error(f"An unexpected error occurred while fetching previous releases: {e}") return None async def main(): + current_checksum = None try: if await check_for_updates(): logger.info("Updating Geonames data...") @@ -135,7 +174,7 @@ async def main(): logger.info(f"Process complete. Release notes:\n{release_notes}") else: - logger.info("Geonames data is up to date.") + logger.info(f"Geonames data is up to date.{' Checksum: ' + current_checksum + '.' if current_checksum else ''} Last modified: {datetime.fromtimestamp(LOCAL_FILE.stat().st_mtime)}") except httpx.HTTPError as e: logger.error(f"HTTP error occurred: {e}") except IOError as e: