diff --git a/.github/sc4e-check-updates.py b/.github/sc4e-check-updates.py index 29a80a5b..70546f5e 100644 --- a/.github/sc4e-check-updates.py +++ b/.github/sc4e-check-updates.py @@ -16,6 +16,26 @@ url_id_pattern = re.compile(r".*sc4evermore.com/.*[?&]id=(\d+):.*") +def nonempty_docs(dirs_or_files): + # Generate all the paths with non-empty documents contained in the yaml files. + # Yield (path, None) in case of parse error. + for d in dirs_or_files: + paths = [d] if not os.path.isdir(d) else \ + (os.path.join(root, fname) for (root, dirs, files) in os.walk(d) for fname in files) + for path in paths: + if not path.endswith(".yaml"): + continue + with open(path) as f: + text = f.read() + try: + for doc in yaml.safe_load_all(text): + if doc is None: # empty yaml file or document + continue + yield path, doc + except yaml.parser.ParserError: + path, None + + def main() -> int: args = sys.argv[1:] if not args: @@ -30,52 +50,41 @@ def main() -> int: errors = 0 out_of_date = 0 up_to_date = 0 - for d in args: - for (root, dirs, files) in os.walk(d): - for fname in files: - if not fname.endswith(".yaml"): - continue - p = os.path.join(root, fname) - with open(p) as f: - text = f.read() - try: - for doc in yaml.safe_load_all(text): - if doc is None: # empty yaml file or document - continue - - # check URLs - url = doc.get('url') - if url is None: - continue # not an asset - m = url_id_pattern.fullmatch(url) - if not m: - continue # we only check SC4E files - file_id = m.group(1) + for p, doc in nonempty_docs(args): + if doc is None: # parse error + errors += 1 + continue - last_modified_upstream = isoparse(upstream_state[file_id]['modified']) - if last_modified_upstream.tzinfo is None: - last_modified_upstream = last_modified_upstream.replace(tzinfo=timezone.utc) + # check URLs + url = doc.get('url') + if url is None: + continue # not an asset + m = url_id_pattern.fullmatch(url) + if not m: + continue # we only check SC4E files + file_id = m.group(1) - if 'lastModified' not in doc: - errors += 1 # TODO - else: - last_modified = isoparse(doc.get('lastModified')) - if last_modified == last_modified_upstream: - up_to_date += 1 - else: - if last_modified < last_modified_upstream: - out_of_date += 1 - else: - errors += 1 # our assets should not be newer than upstream's assets TODO - print("error: ", end='') - print(f"{doc.get('assetId')}:") - print(f" {doc.get('version')} -> {upstream_state[file_id].get('release')}") - print(f" {last_modified.isoformat().replace('+00:00', 'Z')} -> {last_modified_upstream.isoformat().replace('+00:00', 'Z')}") - print(f" https://www.sc4evermore.com/index.php/downloads/download/{file_id}") - print(f" {p}") + last_modified_upstream = isoparse(upstream_state[file_id]['modified']) + if last_modified_upstream.tzinfo is None: + last_modified_upstream = last_modified_upstream.replace(tzinfo=timezone.utc) - except yaml.parser.ParserError: - errors += 1 + if 'lastModified' not in doc: + errors += 1 # TODO + else: + last_modified = isoparse(doc.get('lastModified')) + if last_modified == last_modified_upstream: + up_to_date += 1 + else: + if last_modified < last_modified_upstream: + out_of_date += 1 + else: + errors += 1 # our assets should not be newer than upstream's assets TODO + print("error: ", end='') + print(f"{doc.get('assetId')}:") + print(f" {doc.get('version')} -> {upstream_state[file_id].get('release')}") + print(f" {last_modified.isoformat().replace('+00:00', 'Z')} -> {last_modified_upstream.isoformat().replace('+00:00', 'Z')}") + print(f" https://www.sc4evermore.com/index.php/downloads/download/{file_id}") + print(f" {p}") result = 0 if out_of_date == 0: diff --git a/.github/st-check-updates.py b/.github/st-check-updates.py index b9bf439d..004bd97a 100644 --- a/.github/st-check-updates.py +++ b/.github/st-check-updates.py @@ -4,7 +4,10 @@ # considering the last 180 days. # The STEX_API_KEY environment variable must be set for authentication. # -# Pass directories or yaml files as arguments. +# Pass `--mode=id` as argument to query exactly the IDs used in asset URLs. +# Defaults to `--mode=updated` which queries for recently updated IDs only. +# +# Additionally, pass directories or yaml files as arguments. import yaml import sys @@ -17,101 +20,144 @@ stex_api_key = os.environ.get('STEX_API_KEY') # issued by ST admins url_id_pattern = re.compile(r".*simtropolis.com/files/file/(\d+)-.*?(?:$|[?&]r=(\d+).*$)") # matches ID and optional subfile ID -since_days = 180 +since_days = 180 # to keep the request small +id_limit = 250 # to keep the request small + + +def nonempty_docs(dirs_or_files): + # Generate all the paths with non-empty documents contained in the yaml files. + # Yield (path, None) in case of parse error. + for d in dirs_or_files: + paths = [d] if not os.path.isdir(d) else \ + (os.path.join(root, fname) for (root, dirs, files) in os.walk(d) for fname in files) + for path in paths: + if not path.endswith(".yaml"): + continue + with open(path) as f: + text = f.read() + try: + for doc in yaml.safe_load_all(text): + if doc is None: # empty yaml file or document + continue + yield path, doc + except yaml.parser.ParserError: + path, None def main() -> int: args = sys.argv[1:] + id_mode = any(a == "--mode=id" for a in args) # instead of --mode=updated + args = [a for a in args if not a.startswith("--")] if not args: - print("Pass at least one directory or yaml file to validate as argument.") - return 1 + print("Found no yaml files to analyze.") + return 0 + if not stex_api_key: print("The STEX_API_KEY environment variable must be set for authentication.") return 1 - req_url = f"https://community.simtropolis.com/stex/files-api.php?key={stex_api_key}&days={since_days}&mode=updated&sc4only=true&sort=desc" + errors = 0 + if id_mode: + file_ids = [] + for p, doc in nonempty_docs(args): + if doc is None: # parse error + errors += 1 + continue + + # find all STEX file IDs + url = doc.get('url') + if url is None: + continue # not an asset + m = url_id_pattern.fullmatch(url) + if not m: + continue # we only check ST files + file_id = m.group(1) + file_ids.append(file_id) + + if not file_ids: + print("No STEX file IDs found in yaml files.") + return 0 + + # check relevant STEX file IDs only + req_url = f"https://community.simtropolis.com/stex/files-api.php?key={stex_api_key}&sort=desc&id=" + ",".join(file_ids[:id_limit]) + else: + # check most recently updated STEX entries only + req_url = f"https://community.simtropolis.com/stex/files-api.php?key={stex_api_key}&days={since_days}&mode=updated&sc4only=true&sort=desc" + req = urllib.request.Request(req_url, headers={'User-Agent': 'Mozilla/5.0 Firefox/130.0'}) with urllib.request.urlopen(req) as data: report = json.load(data) upstream_state = {str(item['id']): item for item in report} - errors = 0 out_of_date = 0 up_to_date = 0 skipped = 0 - for d in args: - for (root, dirs, files) in os.walk(d): - for fname in files: - if not fname.endswith(".yaml"): - continue - p = os.path.join(root, fname) - with open(p) as f: - text = f.read() - try: - for doc in yaml.safe_load_all(text): - if doc is None: # empty yaml file or document - continue - - # check URLs - url = doc.get('url') - if url is None: - continue # not an asset - m = url_id_pattern.fullmatch(url) - if not m: - continue # we only check ST files - file_id = m.group(1) - if file_id not in upstream_state: - skipped += 1 # not updated since_days - continue - - subfile_id = m.group(2) # possibly None - subfiles = upstream_state[file_id].get('files', []) - if subfile_id is None: - if len(subfiles) != 1: - errors += 1 - print(f"{doc.get('assetId')}:") - print(f" url must include subfile ID `r=#` as there are {len(subfiles)} subfiles:") - print(" " + "\n ".join(f"{r.get('id')}: {r.get('name')}" for r in subfiles)) - print(f" {upstream_state[file_id].get('fileURL')}") - else: - if subfile_id not in [str(r.get('id')) for r in subfiles]: - errors += 1 - print(f"{doc.get('assetId')}:") - print(f" url subfile ID {subfile_id} does not exist (anymore), so must be updated:") - print(" " + "\n ".join(f"{r.get('id')}: {r.get('name')}" for r in subfiles)) - print(f" {upstream_state[file_id].get('fileURL')}") - - last_modified_upstream = isoparse(upstream_state[file_id]['updated']) - if last_modified_upstream.tzinfo is None: - last_modified_upstream = last_modified_upstream.replace(tzinfo=timezone.utc) - - if 'lastModified' not in doc: - errors += 1 # TODO - else: - last_modified = isoparse(doc.get('lastModified')) - # we ignore small timestamp differences - if abs(last_modified_upstream - last_modified) <= timedelta(minutes=10): - up_to_date += 1 - else: - if last_modified < last_modified_upstream: - out_of_date += 1 - else: - errors += 1 # our assets should not be newer than upstream's assets TODO - print("error: ", end='') - print(f"{doc.get('assetId')}:") - print(f" {doc.get('version')} -> {upstream_state[file_id].get('release')}") - print(f" {last_modified.isoformat().replace('+00:00', 'Z')} -> {last_modified_upstream.isoformat().replace('+00:00', 'Z')}") - print(f" {upstream_state[file_id].get('fileURL')}") - print(f" {p}") - - except yaml.parser.ParserError: - errors += 1 + for p, doc in nonempty_docs(args): + if doc is None: # parse error + errors += 1 + continue + + # check URLs + url = doc.get('url') + if url is None: + continue # not an asset + m = url_id_pattern.fullmatch(url) + if not m: + continue # we only check ST files + file_id = m.group(1) + if file_id not in upstream_state: + skipped += 1 # not updated since_days + continue + + subfile_id = m.group(2) # possibly None + subfiles = upstream_state[file_id].get('files', []) + if subfile_id is None: + if len(subfiles) != 1: + errors += 1 + print(f"{doc.get('assetId')}:") + print(f" url must include subfile ID `r=#` as there are {len(subfiles)} subfiles:") + print(" " + "\n ".join(f"{r.get('id')}: {r.get('name')}" for r in subfiles)) + print(f" {upstream_state[file_id].get('fileURL')}") + else: + if subfile_id not in [str(r.get('id')) for r in subfiles]: + errors += 1 + print(f"{doc.get('assetId')}:") + print(f" url subfile ID {subfile_id} does not exist (anymore), so must be updated:") + print(" " + "\n ".join(f"{r.get('id')}: {r.get('name')}" for r in subfiles)) + print(f" {upstream_state[file_id].get('fileURL')}") + + last_modified_upstream = isoparse(upstream_state[file_id]['updated']) + if last_modified_upstream.tzinfo is None: + last_modified_upstream = last_modified_upstream.replace(tzinfo=timezone.utc) + + if 'lastModified' not in doc: + errors += 1 # TODO + else: + last_modified = isoparse(doc.get('lastModified')) + # we ignore small timestamp differences + if abs(last_modified_upstream - last_modified) <= timedelta(minutes=10): + up_to_date += 1 + else: + if last_modified < last_modified_upstream: + out_of_date += 1 + else: + errors += 1 # our assets should not be newer than upstream's assets TODO + print("error: ", end='') + print(f"{doc.get('assetId')}:") + print(f" {doc.get('version')} -> {upstream_state[file_id].get('release')}") + print(f" {last_modified.isoformat().replace('+00:00', 'Z')} -> {last_modified_upstream.isoformat().replace('+00:00', 'Z')}") + print(f" {upstream_state[file_id].get('fileURL')}") + print(f" {p}") + skipped_msg = ( + "" if not skipped else + f" (skipped {skipped} assets not updated in the last {since_days} days)" if not id_mode else + f" (skipped {skipped} assets)") result = 0 if out_of_date == 0: - print(f"All {up_to_date} ST assets are up-to-date (skipped {skipped} assets not updated in the last {since_days} days).") + print(f"All {up_to_date} ST assets are up-to-date{skipped_msg}.") else: - print(f"There are {out_of_date} outdated ST assets, while {up_to_date} are up-to-date (skipped {skipped} assets not updated in the last {since_days} days).") + print(f"There are {out_of_date} outdated ST assets, while {up_to_date} are up-to-date{skipped_msg}.") result |= 0x02 if errors > 0: print(f"Finished with {errors} errors.") diff --git a/.github/url-check.sh b/.github/url-check.sh new file mode 100755 index 00000000..8ceefb9f --- /dev/null +++ b/.github/url-check.sh @@ -0,0 +1,11 @@ +#!/bin/sh +# Check all STEX URLs contained in files that have been modified since a commit. +set -e +if [ "$#" -ne 2 ]; then + echo "Pass the commit/branch to compare to as first argument, the src folder as second." + exit 1 +fi + +BASE="$(git merge-base @ "$1")" + +git diff "$BASE" --name-only -- "$2" | xargs --delimiter '\n' python .github/st-check-updates.py --mode=id diff --git a/.github/workflows/sc4pac.yaml b/.github/workflows/sc4pac.yaml index 1cf86719..5f2dfe24 100644 --- a/.github/workflows/sc4pac.yaml +++ b/.github/workflows/sc4pac.yaml @@ -3,7 +3,7 @@ name: Sc4pac CI on: push: branches: [ "main", "action" ] - pull_request: + pull_request_target: branches: [ "main" ] workflow_dispatch: # for manually triggering the workflow from Actions tab @@ -23,11 +23,45 @@ jobs: python-version: '3.x' - name: Install dependencies run: python -m pip install --upgrade PyYAML jsonschema - - name: Check sc4pac yaml schema + - name: Check sc4pac yaml schema (pull_request_target) + if: ${{ github.event_name == 'pull_request_target'}} + # With pull_request_target, the `main` branch is checked out, not the PR. + # We partially check out only src/yaml from the PR, the non-code part of the repository, as only that part is relevant for linting. + run: git checkout ${{ github.event.pull_request.head.ref}} -- src/yaml && make lint + - name: Check sc4pac yaml schema (push) + if: ${{ github.event_name != 'pull_request_target'}} run: make lint + # requires STEX_API_KEY, so job is skipped in forks + url-check: + if: ${{ github.repository == 'memo33/sc4pac' }} + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + # to allow partial-checkout/diff of other commit + fetch-depth: 0 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: python -m pip install --upgrade PyYAML jsonschema + - name: Check STEX URLs (pull_request_target) + if: ${{ github.event_name == 'pull_request_target'}} + env: + STEX_API_KEY: ${{ secrets.STEX_API_KEY }} + # we check out only src/yaml, the non-code part of the repository, and compare it to the current (base) branch + run: git checkout ${{ github.event.pull_request.head.ref}} -- src/yaml && sh .github/url-check.sh ${{ github.event.pull_request.base.ref }} src/yaml + - name: Check STEX URLs (push) + if: ${{ github.event_name == 'push'}} + env: + STEX_API_KEY: ${{ secrets.STEX_API_KEY }} + # TODO this is not perfect yet, as `before` sometimes does not exist + run: sh .github/url-check.sh ${{ github.event.before }} src/yaml + deploy: - needs: lint + needs: lint # url-check is not needed as ST is flaky if: ${{ github.repository == 'memo33/sc4pac' && github.ref == 'refs/heads/main' }} environment: name: github-pages diff --git a/Makefile b/Makefile index d9f8ae42..ae9b2b6e 100644 --- a/Makefile +++ b/Makefile @@ -46,4 +46,7 @@ sc4e-check-updates: st-check-updates: set -a && source ./.git/sc4pac-stex-api-key && set +a && python .github/st-check-updates.py src/yaml -.PHONY: gh-pages gh-pages-no-lint channel host host-docs lint sc4e-check-updates st-check-updates +st-url-check: + set -a && source ./.git/sc4pac-stex-api-key && set +a && sh .github/url-check.sh origin/main src/yaml + +.PHONY: gh-pages gh-pages-no-lint channel host host-docs lint sc4e-check-updates st-check-updates st-url-check