Skip to content

Commit

Permalink
Improve release notes and update checking
Browse files Browse the repository at this point in the history
  • Loading branch information
Aareon Sullivan committed Sep 17, 2024
1 parent c9ed797 commit 6f9a0c2
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 26 deletions.
18 changes: 15 additions & 3 deletions .github/workflows/update_allCountries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ jobs:
- name: Run update script
run: python update_allCountries.py
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: Check update status
id: check_update
run: |
UPDATE_STATUS=$(cat update_status.txt)
echo "UPDATE_STATUS=$UPDATE_STATUS" >> $GITHUB_OUTPUT
- name: Read release notes
id: release_notes
Expand All @@ -38,12 +46,16 @@ jobs:
echo "$RELEASE_NOTES" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Read release title
id: release_title
run: echo "RELEASE_TITLE=$(cat release_title.txt)" >> $GITHUB_OUTPUT

- name: Create Release
id: create_release
if: steps.check_update.outputs.UPDATE_STATUS == 'update'
uses: softprops/action-gh-release@v1
with:
tag_name: release-${{ github.run_number }}
name: Release ${{ github.run_number }}
tag_name: ${{ github.ref_name }}-${{ github.run_number }}
name: ${{ steps.release_title.outputs.RELEASE_TITLE }}
body: ${{ steps.release_notes.outputs.RELEASE_NOTES }}
files: allCountries.zip
draft: false
Expand Down
65 changes: 42 additions & 23 deletions update_allCountries.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
"""
This script downloads and processes the GeoNames allCountries.zip file.
It uses the csv module to efficiently process the large dataset.
"""

import asyncio
import csv
import hashlib
import zipfile
import re
from datetime import datetime, timezone
from pathlib import Path

Expand All @@ -16,7 +12,7 @@
GEONAMES_URL = "https://download.geonames.org/export/zip/allCountries.zip"
LOCAL_FILE = Path("allCountries.zip")
EXTRACTED_FILE = Path("allCountries.txt")

GITHUB_API_URL = "https://api.github.com/repos/Aareon/GeoNamesMirror/releases/latest"

async def check_for_updates() -> bool:
if LOCAL_FILE.exists():
Expand All @@ -33,7 +29,6 @@ async def check_for_updates() -> bool:
return remote_time > local_time
return True


async def download_file():
async with httpx.AsyncClient() as client:
async with client.stream("GET", GEONAMES_URL) as response:
Expand All @@ -47,20 +42,17 @@ async def download_file():
downloaded += len(chunk)
logger.info(f"Downloaded {downloaded}/{total_size} bytes")


def calculate_md5(filename: Path) -> str:
hash_md5 = hashlib.md5()
with filename.open("rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()


def extract_zip():
with zipfile.ZipFile(LOCAL_FILE, "r") as zip_ref:
zip_ref.extractall()


def get_statistics():
total_entries = 0
countries = set()
Expand All @@ -81,24 +73,35 @@ def get_statistics():
"md5_checksum": md5_checksum,
}


def format_file_size(size_bytes):
# Convert bytes to megabytes
size_mb = size_bytes / (1024 * 1024)
return f"{size_mb:.2f} MB"


def create_release_notes(stats):
return f"""GeoNames Database Update
def create_release_notes(stats, is_update):
update_status = "Update" if is_update else "No changes"
current_date = datetime.now().strftime("%Y-%m-%d")
return f"""GeoNames Database {update_status} - {current_date}
- Total Entries: {stats['total_entries']:,}
- Countries Covered: {stats['country_count']}
- File Size: {format_file_size(stats['file_size'])}
- MD5 Checksum: {stats['md5_checksum']}
This release contains the latest GeoNames database update.
This release contains the latest GeoNames database {update_status.lower()}.
"""

async def get_previous_checksum():
async with httpx.AsyncClient() as client:
response = await client.get(GITHUB_API_URL)
response.raise_for_status()
release_data = response.json()
body = release_data.get('body', '')

# Extract MD5 checksum from the release notes
match = re.search(r'MD5 Checksum: ([a-fA-F0-9]{32})', body)
if match:
return match.group(1)
return None

async def main():
try:
Expand All @@ -107,13 +110,30 @@ async def main():
await download_file()
extract_zip()
stats = get_statistics()
release_notes = create_release_notes(stats)

# Write release notes to a file for GitHub Actions to use

previous_checksum = await get_previous_checksum()
current_checksum = stats['md5_checksum']

is_update = previous_checksum != current_checksum
release_notes = create_release_notes(stats, is_update)

if is_update:
logger.info("New data detected. Creating release.")
else:
logger.info("No changes in data. Skipping release creation.")

# Write release notes and update status to files for GitHub Actions to use
with open("release_notes.txt", "w") as f:
f.write(release_notes)

logger.info(f"Update complete. Release notes:\n{release_notes}")

with open("update_status.txt", "w") as f:
f.write("update" if is_update else "no_update")

# Write release title to a separate file
with open("release_title.txt", "w") as f:
f.write(release_notes.split('\n')[0])

logger.info(f"Process complete. Release notes:\n{release_notes}")
else:
logger.info("Geonames data is up to date.")
except httpx.HTTPError as e:
Expand All @@ -123,6 +143,5 @@ async def main():
except Exception as e:
logger.error(f"An unexpected error occurred: {e}")


if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())

0 comments on commit 6f9a0c2

Please sign in to comment.