Skip to content

Commit

Permalink
Format and isort
Browse files Browse the repository at this point in the history
  • Loading branch information
Aareon Sullivan committed Sep 18, 2024
1 parent 56d7f56 commit 11fa4de
Showing 1 changed file with 57 additions and 33 deletions.
90 changes: 57 additions & 33 deletions update_allCountries.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import asyncio
import csv
import hashlib
import zipfile
import os
import re
import time
import zipfile
from datetime import datetime, timezone
from pathlib import Path
import time
import os

from tqdm import tqdm
import httpx
from loguru import logger
from tqdm import tqdm

GEONAMES_URL = "https://download.geonames.org/export/zip/allCountries.zip"
LOCAL_FILE = Path("allCountries.zip")
Expand All @@ -33,36 +33,44 @@ async def check_for_updates() -> bool:
return remote_time > local_time
return True


async def download_file():
async with httpx.AsyncClient() as client:
async with client.stream("GET", GEONAMES_URL) as response:
response.raise_for_status()
total_size = int(response.headers.get("Content-Length", 0))

chunk_size = 1024 * 1024 # 1 MB chunks
downloaded = 0
start_time = time.time()
last_log_time = start_time
log_interval = 5 # Log every 5 seconds

with open(LOCAL_FILE, "wb") as f, tqdm(
total=total_size, unit='iB', unit_scale=True, desc="Downloading"
total=total_size, unit="iB", unit_scale=True, desc="Downloading"
) as progress_bar:
async for chunk in response.aiter_bytes(chunk_size):
size = f.write(chunk)
downloaded += size
progress_bar.update(size)

current_time = time.time()
if current_time - last_log_time >= log_interval:
elapsed_time = current_time - start_time
speed = downloaded / elapsed_time / 1024 / 1024 # MB/s
percent = (downloaded / total_size) * 100 if total_size > 0 else 0
logger.info(f"Downloaded: {downloaded/1024/1024:.2f} MB / {total_size/1024/1024:.2f} MB "
f"({percent:.2f}%) - Speed: {speed:.2f} MB/s")
percent = (
(downloaded / total_size) * 100 if total_size > 0 else 0
)
logger.info(
f"Downloaded: {downloaded/1024/1024:.2f} MB / {total_size/1024/1024:.2f} MB "
f"({percent:.2f}%) - Speed: {speed:.2f} MB/s"
)
last_log_time = current_time

logger.info(f"Download completed. Total size: {total_size/1024/1024:.2f} MB")
logger.info(
f"Download completed. Total size: {total_size/1024/1024:.2f} MB"
)


def calculate_md5(filename: Path) -> str:
hash_md5 = hashlib.md5()
Expand All @@ -71,10 +79,12 @@ def calculate_md5(filename: Path) -> str:
hash_md5.update(chunk)
return hash_md5.hexdigest()


def extract_zip():
with zipfile.ZipFile(LOCAL_FILE, "r") as zip_ref:
zip_ref.extractall()


def get_statistics():
total_entries = 0
countries = set()
Expand All @@ -95,10 +105,12 @@ def get_statistics():
"md5_checksum": md5_checksum,
}


def format_file_size(size_bytes):
size_mb = size_bytes / (1024 * 1024)
return f"{size_mb:.2f} MB"


def create_release_notes(stats, is_update):
update_status = "Update" if is_update else "No changes"
current_date = datetime.now().strftime("%Y-%m-%d")
Expand All @@ -112,33 +124,39 @@ def create_release_notes(stats, is_update):
This release contains the latest GeoNames database {update_status.lower()}.
"""


async def get_previous_checksum():
GITHUB_API_URL = "https://api.github.com/repos/Aareon/GeoNamesMirror/releases"
GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")

if not GITHUB_TOKEN:
logger.error("GITHUB_TOKEN environment variable is not set.")
return None

try:
async with httpx.AsyncClient() as client:
response = await client.get(GITHUB_API_URL, headers={
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {GITHUB_TOKEN}",
"X-GitHub-Api-Version": "2022-11-28"
})
response = await client.get(
GITHUB_API_URL,
headers={
"Accept": "application/vnd.github+json",
"Authorization": f"Bearer {GITHUB_TOKEN}",
"X-GitHub-Api-Version": "2022-11-28",
},
)
response.raise_for_status()
releases = response.json()

if not releases:
logger.info("No previous releases found. This will be the first release.")
logger.info(
"No previous releases found. This will be the first release."
)
return None

latest_release = releases[0]
body = latest_release.get('body', '')
body = latest_release.get("body", "")

# Extract MD5 checksum from the release notes
match = re.search(r'MD5 Checksum: ([a-fA-F0-9]{32})', body)
match = re.search(r"MD5 Checksum: ([a-fA-F0-9]{32})", body)
if match:
return match.group(1)
else:
Expand All @@ -147,9 +165,12 @@ async def get_previous_checksum():
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error occurred: {e}")
except Exception as e:
logger.error(f"An unexpected error occurred while fetching previous releases: {e}")
logger.error(
f"An unexpected error occurred while fetching previous releases: {e}"
)
return None


async def main():
current_checksum = None
try:
Expand All @@ -158,10 +179,10 @@ async def main():
await download_file()
extract_zip()
stats = get_statistics()

previous_checksum = await get_previous_checksum()
current_checksum = stats['md5_checksum']
current_checksum = stats["md5_checksum"]

is_update = previous_checksum != current_checksum
release_notes = create_release_notes(stats, is_update)

Expand All @@ -173,23 +194,26 @@ async def main():
# Write release notes and update status to files for GitHub Actions to use
with open("release_notes.txt", "w") as f:
f.write(release_notes)

with open("update_status.txt", "w") as f:
f.write("update" if is_update else "no_update")

# Write release title to a separate file
with open("release_title.txt", "w") as f:
f.write(release_notes.split('\n')[0])
f.write(release_notes.split("\n")[0])

logger.info(f"Process complete. Release notes:\n{release_notes}")
else:
logger.info(f"Geonames data is up to date.{' Checksum: ' + current_checksum + '.' if current_checksum else ''} Last modified: {datetime.fromtimestamp(LOCAL_FILE.stat().st_mtime)}")
logger.info(
f"Geonames data is up to date.{' Checksum: ' + current_checksum + '.' if current_checksum else ''} Last modified: {datetime.fromtimestamp(LOCAL_FILE.stat().st_mtime)}"
)
except httpx.HTTPError as e:
logger.error(f"HTTP error occurred: {e}")
except IOError as e:
logger.error(f"I/O error occurred: {e}")
except Exception as e:
logger.error(f"An unexpected error occurred: {e}")


if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())

0 comments on commit 11fa4de

Please sign in to comment.