diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 0000000..95d00cd --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,32 @@ +name: Test + +on: + push: + branches: [ "main", "master" ] + pull_request: + branches: [ "main", "master" ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install requests + pip install pytest + + - name: Run tests + run: pytest -s ./tests/ \ No newline at end of file diff --git a/.github/workflows/validate.yaml b/.github/workflows/validate.yaml new file mode 100644 index 0000000..c93ac48 --- /dev/null +++ b/.github/workflows/validate.yaml @@ -0,0 +1,30 @@ + +name: Validate + +# Controls when the action will run. +on: + # Triggers the workflow on push or pull request events but only for the main branch + push: + branches: [ "main", "master" ] + pull_request: + branches: [ "main", "master"] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# A workflow run is made up of one or more jobs that can run sequentially or in parallel +jobs: + pre-commit-hooks: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + architecture: 'x64' + - name: Run a script + run: |- + pip install pre-commit + pip install rdflib + pre-commit autoupdate + pre-commit run --all-files --verbose \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9765d26..54b0667 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,77 +1,29 @@ -# -# Run pre-commit hooks. You can run them without installing -# the hook with -# -# $ pre-commit run --all-files -# -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - args: [--allow-multiple-documents] - - id: check-added-large-files - args: - - "--maxkb=4000" -- repo: https://github.com/myint/autoflake - rev: v1.4 - hooks: - - id: autoflake - args: - - --in-place - - --remove-unused-variables - - --remove-all-unused-imports -- repo: https://github.com/psf/black - rev: 22.6.0 - hooks: - - id: black -- repo: https://github.com/pycqa/isort - rev: 5.10.1 - hooks: - - id: isort - name: isort (python) - # Use black profile for isort to avoid conflicts - # see https://github.com/PyCQA/isort/issues/1518 - args: ["--profile", "black"] - - id: isort - name: isort (cython) - types: [cython] - - id: isort - name: isort (pyi) - types: [pyi] -- repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.2 +- repo: https://github.com/teamdigitale/dati-semantic-cookiecutter + rev: 931e0529c8839f6fa8c1ae315839ba7c3060c5f2 hooks: - - id: flake8 -- repo: https://github.com/PyCQA/bandit - rev: 1.7.4 - hooks: - - id: bandit - name: bandit - args: ["-c", ".bandit.yaml"] - description: 'Bandit is a tool for finding common security issues in Python code' - entry: bandit - language: python - language_version: python3 - types: [python] -- repo: https://github.com/Lucas-C/pre-commit-hooks-safety - rev: v1.3.0 - hooks: - - id: python-safety-dependencies-check + - id: check-repo-structure + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + - id: check-filename-format + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + - id: check-filenames-match-uri + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + - id: check-filenames-match-directories + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + - id: check-supported-files + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + - id: check-versioning-pattern + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] # # Semantic checks. # - repo: https://github.com/teamdigitale/json-semantic-playground - rev: c10c5879438de0d321d6591ec88ace95e042e9d2 + rev: 0b4ad4cc883a49878fdfd4539e694ae56b041e29 hooks: - id: validate-csv files: >- - ^assets\/vocabularies/.*\.csv + ^assets\/controlled-vocabularies/.*\.csv - id: validate-oas-schema files: >- ^assets\/schemas\/.*.oas3.yaml @@ -81,5 +33,6 @@ repos: - id: validate-turtle files: >- ^assets\/controlled-vocabularies\/.*\.ttl - - id: validate-directory-versioning - files: '^assets\/ontologies\/.*\.ttl' + - id: validate-turtle + files: >- + ^assets\/schemas\/.*\.ttl diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 0000000..9ba8874 --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,74 @@ +# +# Define running hooks. +# + +- id: check-repo-structure + name: Check Repository Structure + description: |- + Check whether the directory structure is correct + - assets/ontologies/* + - assets/controlled-vocabularies/* + - assets/schemas/* + entry: check_repo_structure + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + language: python + files: ^assets/.* + pass_filenames: false + types: [file] + +- id: check-filename-format + name: Check Filename Format + description: |- + Check whether file and directory names follow the specified format (^[\\.a-z0-9 _-]{1,64}$) + entry: check_filename_format + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + language: python + files: ^assets/.* + pass_filenames: false + types: [file] + +- id: check-filenames-match-uri + name: Check Filename match URI + description: |- + Checks whether the name of each TTL or oas3.yaml file matches the final part of its relative URI + entry: check_filename_match_uri + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + language: python + files: ^assets/.* + pass_filenames: false + types: [file] + additional_dependencies: [rdflib] + +- id: check-filenames-match-directories + name: Check Filename match Directories + description: |- + Check if filenames match the containing directory names + entry: check_filenames_match_directories + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + language: python + files: ^assets/.* + pass_filenames: false + types: [file] + +- id: check-supported-files + name: Check encoding and file suffix + entry: check_supported_files + description: |- + Checks the leaf directories of the specified root directories + to ensure that each leaf directory contains at least one .ttl file in UTF-8 format. + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + language: python + files: ^assets/.* + pass_filenames: false + types: [file] + +- id: check-versioning-pattern + name: Check versioning pattern + entry: check_versioning_pattern + description: |- + Check if the versioning pattern is correct for leaf directories + args: ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + language: python + files: ^assets/.* + pass_filenames: false + types: [file] \ No newline at end of file diff --git a/README.en.md b/README.en.md index 73c9344..0905fbb 100644 --- a/README.en.md +++ b/README.en.md @@ -23,12 +23,32 @@ For readability: and are either used for testing or for developing and validating what's in assets/. -## Development +## Automated Checks and Testing -This repository uses pre-commit to validate content. -An integrated testing environment to reproduce the CI pipeline -is available via docker-compose, which goes on thru a set of steps. +Below are described the procedures for automated checks and testing implemented, essential for ensuring the quality and integrity of the repository content. + +### Automated Checks (Pre-commit) + +This repository implements automated checks using [pre-commit](https://pre-commit.com/). The specifications of the checks are defined in the file [`.pre-commit-config.yaml`](.pre-commit-config.yaml). + +These checks can be executed using GitHub Actions. The `validate.yaml` file in `.github/workflows` automatically enables pre-commit checks after each push or pull request (PR). Additionally, these checks can be manually activated at any time. + +To enable pre-commit checks in another repository, simply copy the [`.pre-commit-config.yaml`](.pre-commit-config.yaml) file and the [`.github/workflows/validate.yaml`](.github/workflows/validate.yaml) file. + +### URL Testing + +In the `tests` directory, there is a script named `test_urls.py`, which verifies GitHub-related URLs present in the files of the `assets` subdirectories. + +This test can also be automated using GitHub Actions. The `test.yaml` file in `.github/workflows` automatically activates tests after each push or pull request. Similarly, these tests can be manually initiated at any time. + +To enable URL testing in another repository, simply copy the [`/tests/test_urls.py`](/tests/test_urls.py) file and the [`.github/workflows/test.yaml`](.github/workflows/test.yaml) file. + +### Local Checks and Testing + +Local checks and testing can be performed using Docker or simply Python. An integrated test environment to reproduce the CI pipeline is available through `docker-compose`, which executes a series of steps. ```bash docker-compose -f docker-compose-test.yml up ``` + +Note: If you wish to transfer this environment to another repository, it's important to note that the Docker environment requires the Dockerfiles present in the tests directory (such as Dockerfile.precommit and Dockerfile.pytest). \ No newline at end of file diff --git a/README.md b/README.md index 95ffcc5..1b0ff35 100644 --- a/README.md +++ b/README.md @@ -43,14 +43,33 @@ come: -## Sviluppo +## Controlli Automatici e Test -Questo repository utilizza [pre-commit](https://pre-commit.com/) per convalidare il contenuto dei file: -le verifiche sono indicate in [.pre-commit-config.yaml](.pre-commit-config.yaml). +Di seguito vengono descritte le procedure di controllo automatico e di test implementate, utili per garantire la qualità e l'integrità del contenuto del repository -Un ambiente di test integrato per riprodurre la pipeline CI -è disponibile tramite docker-compose, che esegue una serie di passaggi. +### Controlli Automatici (Pre-commit) + +Questo repository implementa i controlli automatici utilizzando [pre-commit](https://pre-commit.com/). Le specifiche delle verifiche sono definite nel file [`.pre-commit-config.yaml`](.pre-commit-config.yaml). + +È possibile eseguire tali verifiche mediante GitHub Actions. Il file `validate.yaml` in `.github/workflows` abilita automaticamente i controlli pre-commit dopo ogni push o pull request (PR). Inoltre, è possibile attivare manualmente tali controlli in qualsiasi momento. + +Per abilitare i controlli pre-commit in un altro repository, è sufficiente copiare il file [`.pre-commit-config.yaml`](.pre-commit-config.yaml) e il file [`.github/workflows/validate.yaml`](.github/workflows/validate.yaml). + +### Test URL + +Nella directory `tests` è presente uno script denominato `test_urls.py`, che consente di verificare gli URL relativi a GitHub presenti nei file delle sottodirectory `assets`. + +Anche questo test può essere automatizzato mediante GitHub Actions. Il file `test.yaml` in `.github/workflows` attiva automaticamente i test dopo ogni push o pull request. Allo stesso modo, è possibile avviare manualmente questi test in qualsiasi momento. + +Per abilitare i test URL in un altro repository, è sufficiente copiare il file [`/tests/test_urls.py`](/tests/test_urls.py) e il file [`.github/workflows/test.yaml`](.github/workflows/test.yaml). + +### Controlli e Test in Locale + +È possibile eseguire i controlli e i test in locale utilizzando l'ambiente Docker o semplicemente Python. Un ambiente di test integrato per riprodurre la pipeline CI è disponibile tramite `docker-compose`, che esegue una serie di passaggi. ```bash docker-compose -f docker-compose-test.yml up ``` + +Nota: Se si desidera trasferire questo ambiente su un altro repository, è importante considerare che l'ambiente Docker richiede i Dockerfile presenti nella directory `tests` (come `Dockerfile.precommit` e `Dockerfile.pytest`). + diff --git a/docker-compose-test.yml b/docker-compose-test.yml index ad0c18d..d8f169f 100644 --- a/docker-compose-test.yml +++ b/docker-compose-test.yml @@ -1,27 +1,20 @@ -version: '3.9' +version: '3.10' + services: - checkout: &baseline - image: alpine/git - volumes: - - ./_deleteme:/code - working_dir: /code - entrypoint: ["git"] - command: >- - clone https://github.com/teamdigitale/dati-semantic-cookiecutter.git . - zero: + ci-precommit: build: - context: ./ + context: . dockerfile: tests/Dockerfile.precommit volumes: - - ./_deleteme:/code - working_dir: /code - entrypoint: ['pre-commit'] - command: ['run', '-a'] - depends_on: - - checkout - one: - <<: *baseline - image: busybox - command: ['sh', '-c', 'exit 0'] - depends_on: - - zero + - .:/app + working_dir: /app + entrypoint: 'pre-commit run --all-files --verbose' + + ci-pytest: + build: + context: . + dockerfile: tests/Dockerfile.pytest + volumes: + - .:/app + working_dir: /app + entrypoint: 'pytest -s ./tests/' \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cbd3bd8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +pytest==8.1.1 +pre-commit==3.6.0 +requests==2.31.0 +rdflib==7.0.0 \ No newline at end of file diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/check_filename_format.py b/scripts/check_filename_format.py new file mode 100644 index 0000000..8b798c4 --- /dev/null +++ b/scripts/check_filename_format.py @@ -0,0 +1,61 @@ +import os +import re +import sys + +def check_filename_format(root_dirs): + """ + Check whether file and directory names follow the specified format (pattern) + Args: + root_dirs (list): A list of root directories to be checked. + Returns: + bool: True if all file and directory names match the required format, False otherwise. + """ + + pattern = r'^[\\.a-z0-9 _-]{1,64}$' + extensions_to_check = ['.ttl', '.rdf', '.csv', '.yaml'] + + for root_dir in root_dirs: + for dirpath, dirnames, filenames in os.walk(root_dir): + for filename in filenames: + name, extension = os.path.splitext(filename) + if extension not in extensions_to_check: + continue + if not re.match(pattern, name): + print(f"Error: filename '{filename}' in directory '{dirpath}' does not match the required format.") + return False + for dirname in dirnames: + if not re.match(pattern, dirname): + print(f"Error: directory name '{dirname}' in directory '{dirpath}' does not match the required format.") + return False + + return True + +def check_directory_existence(root_dirs): + existing_dirs = [root_dir for root_dir in root_dirs if os.path.exists(root_dir)] + + if not existing_dirs: + print(f"{root_dirs} don't exist") + return False + + # Check if any directories don't exist + non_existent_dirs = [root_dir for root_dir in root_dirs if root_dir not in existing_dirs] + + for root_dir in non_existent_dirs: + print(f"WARNING: {root_dir} does not exist") + return True + +def main(): + root_dirs = sys.argv[1:] # Read dir args + + if not root_dirs: + print("No root directories provided.") + exit(1) + + if not check_directory_existence(root_dirs): + exit(1) + + if not check_filename_format(root_dirs): + exit(1) + +if __name__ == "__main__": + main() diff --git a/scripts/check_filename_match_uri.py b/scripts/check_filename_match_uri.py new file mode 100644 index 0000000..b538929 --- /dev/null +++ b/scripts/check_filename_match_uri.py @@ -0,0 +1,115 @@ +import sys +import os +from pathlib import Path +from rdflib import Graph, RDF, RDFS, OWL, SKOS, Namespace + +def extract_main_uri(ttl_file,root_dir): + """ + Extracts the main URI relative to the specified TTL file. + + Args: + ttl_file (str): The path of the TTL file. + + Returns: + str: The main relative URI if found, otherwise None. + """ + g = Graph() + g.parse(ttl_file, format="ttl") + + # Define namespace prefixes + dcatapit = Namespace("http://dati.gov.it/onto/dcatapit#") + + main_uri = None + + for s, p, o in g: + if (s, RDF.type, OWL.Ontology) in g and "onto" in root_dir.lower(): + main_uri = s + break + elif p == RDF.type and o == dcatapit.Dataset: + main_uri = s + break + # elif (s, RDF.type, RDFS.Class) in g: + # main_uri = s + # break + elif (s, RDF.type, SKOS.ConceptScheme) in g: + main_uri = s + break + + return main_uri + +def check_filename_match_uri(root_dirs): + """ + Checks whether the name of each TTL or oas3.yaml file matches the final part of its relative URI. + + Args: + root_dirs (list): List of directories to search for TTL files. + + Returns: + list: List of tuples (file, uri) for TTL or oas3.yaml files that do not match the URI. + """ + mismatches = [] + + for root_dir in root_dirs: + for file_path in Path(root_dir).rglob("*.ttl"): + filename = file_path.stem # File name without extension + uri = extract_main_uri(str(file_path), root_dir) # Main relative URI of the file + + if uri: + # Extract the final part of the URI + uri_parts = str(uri).split("/") + last_uri_part = uri_parts[-1] + if last_uri_part == '': + last_uri_part = uri_parts[-2] + + # Check if the root directory contains "schema" + if "schema" in root_dir.lower(): + # Check if the file with .oas3.yaml extension exists + oas3_yaml_file = Path(file_path.parent, f"{last_uri_part}") + if not oas3_yaml_file.exists(): + mismatches.append((str(oas3_yaml_file), str(uri))) + else: + # Compare the file name with the last part of the URI + if filename != last_uri_part: + mismatches.append((str(file_path), str(uri))) + else: + print(f"Warning: No main relative URI found for file {file_path}") + + return mismatches + +def check_directory_existence(root_dirs): + existing_dirs = [root_dir for root_dir in root_dirs if os.path.exists(root_dir)] + + if not existing_dirs: + print(f"{root_dirs} don't exist") + return False + + # Check if any directories don't exist + non_existent_dirs = [root_dir for root_dir in root_dirs if root_dir not in existing_dirs] + + for root_dir in non_existent_dirs: + print(f"WARNING: {root_dir} does not exist") + return True + +def main(): + root_dirs = sys.argv[1:] + + if not root_dirs: + print("No root directories provided.") + exit(1) + + if not check_directory_existence(root_dirs): + exit(1) + + mismatches = check_filename_match_uri(root_dirs) + + if mismatches: + print("Error: The following files do not match their relative URI:") + for file_path, uri in mismatches: + print(f"- File: {file_path}, URI: {uri}") + exit(1) + else: + print("All files match their relative URI.") + +if __name__ == "__main__": + main() + diff --git a/scripts/check_filenames_match_directories.py b/scripts/check_filenames_match_directories.py new file mode 100644 index 0000000..db251f1 --- /dev/null +++ b/scripts/check_filenames_match_directories.py @@ -0,0 +1,83 @@ +import os +import sys + + +# List of filenames to be excluded +EXCLUDED_FILENAMES = ["index", "datapackage", "context-short", "rules"] + +# List of extensions to be excluded +EXCLUDED_EXTENSIONS = [".oas3.yaml", ".md", ".shacl", ".frame.yamlld", ".ld.yaml"] + +def split_filename_extension(filename): + """ + Split filename into name and extension. + Args: + filename (str): The filename to split. + Returns: + tuple: A tuple containing the name and extension of the filename. + """ + parts = filename.split(".") + if len(parts) > 2: + # If there are more than 1 periods, consider the last one as part of the extension + # e.g. education-level.frame.yamlld -> education-level .frame.yamlld + name = ".".join(parts[:-2]) + extension = "." + ".".join(parts[-2:]) + else: + # Otherwise, consider only the extension as the last part + name, extension = os.path.splitext(filename) + + return name, extension + +def check_filenames_match_directories(root_dirs): + """ + Check if filenames match the containing directory names. + Args: + root_dirs (list): A list of root directories to be checked. + Returns: + bool: True if all filenames match their containing directory names, False otherwise. + """ + + for root_dir in root_dirs: + for dirpath, _, filenames in os.walk(root_dir): + for filename in filenames: + name, extension = split_filename_extension(filename) + parent_dir = os.path.basename(dirpath) + parent_dir_1 = os.path.basename(os.path.dirname(dirpath)) + parent_dir_2 = os.path.basename(os.path.dirname(os.path.dirname(dirpath))) + if name != parent_dir and name != parent_dir_1 and name != parent_dir_2: + if name not in EXCLUDED_FILENAMES and extension not in EXCLUDED_EXTENSIONS: + print(f"Error: Filename '{filename}' in '{dirpath}' dir does not match its containing directory name.") + return False + + return True + +def check_directory_existence(root_dirs): + existing_dirs = [root_dir for root_dir in root_dirs if os.path.exists(root_dir)] + + if not existing_dirs: + print(f"{root_dirs} don't exist") + return False + + # Check if any directories don't exist + non_existent_dirs = [root_dir for root_dir in root_dirs if root_dir not in existing_dirs] + + for root_dir in non_existent_dirs: + print(f"WARNING: {root_dir} does not exist") + return True + +def main(): + root_dirs = sys.argv[1:] # Read dir args + + if not root_dirs: + print("No root directories provided.") + exit(1) + + if not check_directory_existence(root_dirs): + exit(1) + + if not check_filenames_match_directories(root_dirs): + exit(1) + +if __name__ == "__main__": + main() + diff --git a/scripts/check_repo_structure.py b/scripts/check_repo_structure.py new file mode 100644 index 0000000..29ddc7a --- /dev/null +++ b/scripts/check_repo_structure.py @@ -0,0 +1,31 @@ +import os +import sys + +def check_structure(required_dirs): + """ + Check whether the directory structure is correct. + Args: + required_dirs (list): A list of required directories to be checked. + Returns: + bool: True if all required directories exist, False otherwise. + """ + + for dir in required_dirs: + if not os.path.exists(dir): + print(f"Error: directory '{dir}' not exists.") + return False + + return True + +def main(): + required_dirs = sys.argv[1:] # Read dir args + + if not required_dirs: + print("No root directories provided.") + exit(1) + + if not check_structure(required_dirs): + exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/check_supported_files.py b/scripts/check_supported_files.py new file mode 100644 index 0000000..e302288 --- /dev/null +++ b/scripts/check_supported_files.py @@ -0,0 +1,98 @@ +import os +import sys +from pathlib import Path + +""" +This script checks the leaf directories of the specified root directories to ensure that each leaf directory contains at least one .ttl file in UTF-8 format. +""" + +def is_utf8(file_path): + """ + Check if a file is encoded in UTF-8 format. + Args: + file_path (str): The path to the file. + Returns: + bool: True if the file is encoded in UTF-8, False otherwise. + """ + try: + with open(file_path, 'r', encoding='utf-8') as file: + file.read() + return True + except UnicodeDecodeError: + return False + +def check_supported_files(root_dirs): + """ + Check if the leaf directories contain at least one .ttl file in UTF-8 format. + Args: + root_dirs (list): A list of root directories to be checked. + Returns: + bool: True if all leaf directories contain a .ttl file in UTF-8 format, False otherwise. + """ + def dfs(directory): + """ + Perform a depth-first search (DFS) to check leaf directories. + Args: + directory (Path): The directory to be checked. + Returns: + bool: True if all leaf directories contain a .ttl file in UTF-8 format, False otherwise. + """ + has_ttl_file = False + if not any(item.is_dir() for item in directory.iterdir()): + for item in directory.iterdir(): + if item.is_file() and item.suffix == '.ttl': + has_ttl_file = True + if not is_utf8(item): + print(f"Error: {item} is not encoded in UTF-8.") + return False + if not has_ttl_file and directory != root_dir: + print(f"Error: No .ttl files found in directory: {directory}") + return False + else: + for item in directory.iterdir(): + if item.is_dir(): + if not dfs(item): + return False + + return True + + for root_dir in root_dirs: + root_path = Path(root_dir) + if not root_path.exists() or not root_path.is_dir(): + print(f"Error: Invalid root directory: {root_dir}") + return False + + if not dfs(root_path): + return False + + return True + +def check_directory_existence(root_dirs): + existing_dirs = [root_dir for root_dir in root_dirs if os.path.exists(root_dir)] + + if not existing_dirs: + print(f"{root_dirs} don't exist") + return False + + # Check if any directories don't exist + non_existent_dirs = [root_dir for root_dir in root_dirs if root_dir not in existing_dirs] + + for root_dir in non_existent_dirs: + print(f"WARNING: {root_dir} does not exist") + return True + +def main(): + root_dirs = sys.argv[1:] + + if not root_dirs: + print("No root directories provided.") + exit(1) + + if not check_directory_existence(root_dirs): + exit(1) + + if not check_supported_files(root_dirs): + exit(1) + +if __name__ == "__main__": + main() diff --git a/scripts/check_versioning_pattern.py b/scripts/check_versioning_pattern.py new file mode 100644 index 0000000..f0fc2df --- /dev/null +++ b/scripts/check_versioning_pattern.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python +import sys +import os +import re + +def check_versioning_pattern(root_dirs): + """ + Check if the versioning pattern is correct for leaf directories. + """ + version_pattern = r"(latest|v?\d+(\.\d+){0,2})$" # Regular expression pattern to match versioning format + dir_pattern = r"(latest|\b(?:\D*\d\D*)+\b)" # Regular expression pattern to match directory names + errors = False + checked_versions = {} # Dictionary to store checked versions + + for root_dir in root_dirs: + for dirpath, dirnames, _ in os.walk(root_dir): + + if not dirnames: # Check only leaf directories + versions = set() + for dirname in os.listdir(os.path.dirname(dirpath)): + + if re.match(dir_pattern, dirname): # Check if the directory name matches the pattern + versions.add(dirname) + + # Remove "latest" if present in the set + versions.discard("latest") + + superior_directory_path = os.path.dirname(dirpath) + + # Check if the versions have already been checked + if tuple(versions) in checked_versions: + continue # Skip if already checked + + # Verify that all strings in the set start with a number or "v" + if not (all(re.match(r"v\d", version) for version in versions) or all(version[0].isdigit() for version in versions)): + # If not all strings start with a number or "v", report an error + print(f"Error: Inconsistent versioning pattern found in {superior_directory_path}: {versions}") + errors = True + + # Verify that all strings in the set match the versioning pattern + if not (all(re.match(version_pattern, version) for version in versions)): + print(f"Error: Inconsistent versioning pattern found in {superior_directory_path}: {versions}") + errors = True + + checked_versions[tuple(versions)] = True # Mark versions as checked + + return not errors + +def check_directory_existence(root_dirs): + existing_dirs = [root_dir for root_dir in root_dirs if os.path.exists(root_dir)] + + if not existing_dirs: + print(f"{root_dirs} don't exist") + return False + + # Check if any directories don't exist + non_existent_dirs = [root_dir for root_dir in root_dirs if root_dir not in existing_dirs] + + for root_dir in non_existent_dirs: + print(f"WARNING: {root_dir} does not exist") + return True + +def main(): + root_dirs = sys.argv[1:] # Read args + + if not root_dirs: + print("No root directories provided.") + exit(1) + + if not check_directory_existence(root_dirs): + exit(1) + + if not check_versioning_pattern(root_dirs): + exit(1) + +if __name__ == "__main__": + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..29cb0d1 --- /dev/null +++ b/setup.py @@ -0,0 +1,29 @@ +from setuptools import find_packages +from setuptools import setup + +with open("requirements.txt") as f: + requirements = f.read().splitlines() + +setup( + name="dati_semantic_cookiecutter", + version="0.1.0", + description="Tools to check semantic assets", + classifiers=[ + "Programming Language :: Python :: 3.11", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + packages=find_packages('.'), + install_requires=requirements, + entry_points={ + "console_scripts": [ + "check_repo_structure = scripts.check_repo_structure:main", + "check_filename_format = scripts.check_filename_format:main", + "check_filename_match_uri = scripts.check_filename_match_uri:main", + "check_filenames_match_directories = scripts.check_filenames_match_directories:main", + "check_supported_files = scripts.check_supported_files:main", + "check_versioning_pattern = scripts.check_versioning_pattern:main", + "directory_existence_checker = scripts.directory_existence_checker:check_directory_existence" + ] + }, +) diff --git a/tests/Dockerfile.precommit b/tests/Dockerfile.precommit index 0b776e4..f444f22 100644 --- a/tests/Dockerfile.precommit +++ b/tests/Dockerfile.precommit @@ -1,3 +1,3 @@ -FROM python:3.9 +FROM python:3.11 RUN pip3 install pre-commit -ENTRYPOINT ["pre-commit", "run", "-a"] +ENTRYPOINT ["pre-commit", "run", "-a"] \ No newline at end of file diff --git a/tests/Dockerfile.pytest b/tests/Dockerfile.pytest new file mode 100644 index 0000000..b7ae4e5 --- /dev/null +++ b/tests/Dockerfile.pytest @@ -0,0 +1,4 @@ +FROM python:3.11 +RUN pip install requests==2.31.0 +RUN pip install pytest==8.1.1 +ENTRYPOINT ["pytest", "-s", "./tests/"] \ No newline at end of file diff --git a/tests/test_urls.py b/tests/test_urls.py new file mode 100644 index 0000000..5cd07f0 --- /dev/null +++ b/tests/test_urls.py @@ -0,0 +1,116 @@ +import os +import re +import requests +import time +from pathlib import Path +from urllib.parse import urlparse +import pytest + +re_url = re.compile(r'[<"](https://github.*|https://raw.githubusercontent[^>"]*)[>"]') +root_dirs = ["assets/controlled-vocabularies/", "assets/ontologies/", "assets/schemas/"] + +def get_urls(root_dirs): + """ + Get URLs from .ttl files in specified directories. + """ + urls = [] + for root_dir in root_dirs: + for file_path in Path(root_dir).rglob("*.ttl"): + for url in re_url.findall(file_path.read_text(encoding="utf8")): + urls.append((file_path, url.strip('<">'), root_dir)) + return urls + +def request_url(method, url): + """ + Make HTTP request to the given URL with retries. + """ + for i in range(1, 4): + ret = method(url) + if ret.status_code != 429: + break + + backoff = int(ret.headers["Retry-After"]) + if backoff > 100: + backoff = 100 + time.sleep(i * backoff) + return ret + +def extract_relative_path(url, root_dir): + """ + Extracts the relative path from the URL based on the root directory. + Returns None if an error occurs. + """ + # Check if root_dir is present in the URL + if root_dir not in url: + return None + + # Find the index of the root_dir in the URL + start_index = url.find(root_dir) + if start_index == -1: + return None + + # Extract relative path from start_index + relative_path = url[start_index:] + + return relative_path + +def check_local_file_exists(file_path): + """ + Check if the file exists locally + If the file exists locally, it will exist when a PR is merged + """ + return os.path.exists(file_path) + +def check_repository_existence(url): + # Extract the username and repository name from the URL + parsed_url = urlparse(url) + path_parts = parsed_url.path.split("/") + username = path_parts[1] + repository = path_parts[2] + + # GET request to verify the existence of the repository + response = requests.get(f"https://api.github.com/repos/{username}/{repository}") + + # The repo exists on Github + if response.status_code == 200: + return True + # The repo doesn't exist on Github + else: + return False + +@pytest.mark.skipif(all(not os.path.exists(root_dir) for root_dir in root_dirs), reason="No root directories found") +def test_url(): + print("Starting URL test...") + errors = [] + + # Check if root_dir exist + for root_dir in root_dirs: + if not os.path.exists(root_dir): + print(f"WARNING: root directory '{root_dir}' does not exist.") + + for file_path, url, root_dir in get_urls(root_dirs): + print(f"Testing URL: {url}") + + ret = request_url(requests.head, url) + print(f"status_code: {ret}") + + if ret.status_code != 200: + relative_path = extract_relative_path(url, root_dir) + + if relative_path: + local_file_exists = check_local_file_exists(relative_path) + github_repo_exists = check_repository_existence(url) + + if not (local_file_exists and github_repo_exists): + errors.append(f"ERROR: URL '{url}' in file '{file_path}' is not accessible, and the corresponding local file does not exist.") + else: + errors.append(f"ERROR: the corresponding local file of url '{url}' in file '{file_path}' does not exist, root_dir '{root_dir}' is different") + + if errors: + print("\nErrors found during URL test:") + for error in errors: + print(error) + assert False, "\n".join(errors) + +# Run test +test_url() \ No newline at end of file