From 6ace31fb3db6ff87bfa2f4e6b3832c84272b61f5 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 19 Nov 2024 12:59:53 -0500 Subject: [PATCH 01/24] fix definition of CWL schema fields causing validation error when parsing body --- CHANGES.rst | 3 + tests/processes/test_wps_package.py | 307 +++++++++++----------- weaver/processes/constants.py | 29 ++ weaver/processes/wps_package.py | 72 +++-- weaver/wps_restapi/swagger_definitions.py | 112 +++++++- 5 files changed, 338 insertions(+), 185 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 852e1a2eb..d515cfa39 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -65,6 +65,9 @@ Changes: Fixes: ------ +- Fix definition of `CWL` ``schema.org`` namespaced fields (i.e.: ``s:author`` and ``s:dateCreated``) causing + schema deserialization error when validation the submitted request body against typical examples provided in + `CWL Metadata and Authorship `_. - Fix ``GET /jobs/{jobId}/inputs`` contents to correctly return the submitted ``outputs`` definition for `Process` execution (fixes `#715 `_). - Fix missing ``Link`` header with ``rel: monitor`` relationship in the created `Job` responses diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py index 0d896636d..4c81100aa 100644 --- a/tests/processes/test_wps_package.py +++ b/tests/processes/test_wps_package.py @@ -54,6 +54,7 @@ mask_process_inputs ) from weaver.wps.service import WorkerRequest +from weaver.wps_restapi import swagger_definitions as sd if TYPE_CHECKING: from typing import Any, Dict, TypeVar @@ -845,59 +846,45 @@ def test_format_extension_validator_basic(data_input, mode, expect): assert format_extension_validator(data_input, mode) == expect -@pytest.mark.parametrize("original, expected", [ - ( - # Test author metadata with empty wps_package - { - "cwl_package_package": { - "s:author": [ - {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."} - ], +@pytest.mark.parametrize( + ["cwl_package_original", "cwl_package_expected"], + [ + ( + # Test author metadata with empty wps_package + { + "cwl_package_package": { + "s:author": [ + {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."} + ], + }, + "wps_package_metadata": {} }, - "wps_package_metadata": {} - }, - { - "abstract": "", - "title": "", - "metadata": [ - { - "role": "author", - "value": { - "$schema": "https://schema.org/Person", - "name": "John Doe", - "affiliation": "Example Inc." + { + "abstract": "", + "title": "", + "metadata": [ + { + "role": "author", + "value": { + "$schema": "https://schema.org/Person", + "name": "John Doe", + "affiliation": "Example Inc." + } } - } - ] - } - ), - ( - # Test codeRepository - { - "cwl_package_package": { - "s:codeRepository": "https://gitlab.com/", - }, - "wps_package_metadata": {} - }, - { - "abstract": "", - "title": "", - "metadata": [ - { - "type": "text/html", - "rel": "codeRepository", - "href": "https://gitlab.com/" - } - ] - } - ), - ( - # Test Version with existing metadata - { - "cwl_package_package": { - "s:version": "1.0" + ] + } + ), + ( + # Test codeRepository + { + "cwl_package_package": { + "s:codeRepository": "https://gitlab.com/", + }, + "wps_package_metadata": {} }, - "wps_package_metadata": { + { + "abstract": "", + "title": "", "metadata": [ { "type": "text/html", @@ -906,88 +893,93 @@ def test_format_extension_validator_basic(data_input, mode, expect): } ] } - }, - { - "abstract": "", - "title": "", - "version": "1.0", - "metadata": [ - { - "type": "text/html", - "rel": "codeRepository", - "href": "https://gitlab.com/" + ), + ( + # Test Version with existing metadata + { + "cwl_package_package": { + "s:version": "1.0" }, - ], - } - ), - ( - # Test softwareVersion - { - "cwl_package_package": { - "s:softwareVersion": "1.0.0" + "wps_package_metadata": { + "metadata": [ + { + "type": "text/html", + "rel": "codeRepository", + "href": "https://gitlab.com/" + } + ] + } }, - "wps_package_metadata": {} - }, - { - "abstract": "", - "title": "", - "version": "1.0.0" - } - ), - ( - # Test contributor - { - "cwl_package_package": { - "s:contributor": [ - {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."} + { + "abstract": "", + "title": "", + "version": "1.0", + "metadata": [ + { + "type": "text/html", + "rel": "codeRepository", + "href": "https://gitlab.com/" + }, ], + } + ), + ( + # Test softwareVersion + { + "cwl_package_package": { + "s:softwareVersion": "1.0.0" + }, + "wps_package_metadata": {} }, - "wps_package_metadata": {} - }, - { - "abstract": "", - "title": "", - "metadata": [ - { - "role": "contributor", - "value": { - "$schema": "https://schema.org/Person", - "name": "John Doe", - "affiliation": "Example Inc." - } - } - ] - } - ), - ( - # Test citation - { - "cwl_package_package": { - "s:citation": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + { + "abstract": "", + "title": "", + "version": "1.0.0" + } + ), + ( + # Test contributor + { + "cwl_package_package": { + "s:contributor": [ + {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."}, + {"class": "s:Person", "s:name": "Other Guy", "s:affiliation": "Elsewhere"}, + ], + }, + "wps_package_metadata": {} }, - "wps_package_metadata": {} - }, - { - "abstract": "", - "title": "", - "metadata": [ - { - "type": "text/plain", - "rel": "citation", - "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + { + "abstract": "", + "title": "", + "metadata": [ + { + "role": "contributor", + "value": { + "$schema": "https://schema.org/Person", + "name": "John Doe", + "affiliation": "Example Inc." + } + }, + { + "role": "contributor", + "value": { + "$schema": "https://schema.org/Person", + "name": "Other Guy", + "affiliation": "Elsewhere" + } + } + ] + } + ), + ( + # Test citation + { + "cwl_package_package": { + "s:citation": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" }, - ], - } - ), - ( - # Test dateCreated with existing metadata - { - "cwl_package_package": { - "s:dateCreated": [ - {"class": "s:DateTime", "s:dateCreated": "2016-12-13"} - ], + "wps_package_metadata": {} }, - "wps_package_metadata": { + { "abstract": "", "title": "", "metadata": [ @@ -998,31 +990,48 @@ def test_format_extension_validator_basic(data_input, mode, expect): }, ], } - }, - { - "abstract": "", - "title": "", - "metadata": [ - { - "type": "text/plain", - "rel": "citation", - "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + ), + ( + # Test dateCreated with existing metadata + { + "cwl_package_package": { + "s:dateCreated": "2016-12-13", }, - { - "role": "dateCreated", - "value": { - "$schema": "https://schema.org/DateTime", - "dateCreated": "2016-12-13", - } + "wps_package_metadata": { + "abstract": "", + "title": "", + "metadata": [ + { + "type": "text/plain", + "rel": "citation", + "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + }, + ], } - ] - } - ), -]) -def test_process_metadata(original, expected): + }, + { + "abstract": "", + "title": "", + "metadata": [ + { + "type": "text/plain", + "rel": "citation", + "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + }, + { + "role": "dateCreated", + "value": "2016-12-13", + } + ] + } + ), + ] +) +def test_process_metadata(cwl_package_original, cwl_package_expected): # type: (CWL, CWL) -> None - cwl_package_package = original["cwl_package_package"] - wps_package_metadata = original["wps_package_metadata"] + cwl_package_package = cwl_package_original["cwl_package_package"] + wps_package_metadata = cwl_package_original["wps_package_metadata"] + cwl_package_validated = sd.CWLMetadata().deserialize(cwl_package_package) # must not raise + assert cwl_package_validated == cwl_package_package # should be unchanged _update_package_metadata(wps_package_metadata, cwl_package_package) - # Assertions - assert wps_package_metadata == expected + assert wps_package_metadata == cwl_package_expected diff --git a/weaver/processes/constants.py b/weaver/processes/constants.py index 263a25da0..60e884c30 100644 --- a/weaver/processes/constants.py +++ b/weaver/processes/constants.py @@ -102,6 +102,35 @@ class OpenSearchField(Constants): """ Namespace used to reference :term:`CWL` definitions provided by ``schema.org`` typically used for additional metadata. """ +CWL_NAMESPACE_SCHEMA_METADATA_NAME = f"{CWL_NAMESPACE_SCHEMA_ID}:name" +CWL_NAMESPACE_SCHEMA_METADATA_EMAIL = f"{CWL_NAMESPACE_SCHEMA_ID}:email" +CWL_NAMESPACE_SCHEMA_METADATA_IDENTIFIER = f"{CWL_NAMESPACE_SCHEMA_ID}:identifier" +CWL_NAMESPACE_SCHEMA_METADATA_PERSON = f"{CWL_NAMESPACE_SCHEMA_ID}:Person" +CWL_NAMESPACE_SCHEMA_METADATA_AUTHOR = f"{CWL_NAMESPACE_SCHEMA_ID}:author" +CWL_NAMESPACE_SCHEMA_METADATA_CITATION = f"{CWL_NAMESPACE_SCHEMA_ID}:citation" +CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS = f"{CWL_NAMESPACE_SCHEMA_ID}:keywords" +CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY = f"{CWL_NAMESPACE_SCHEMA_ID}:codeRepository" +CWL_NAMESPACE_SCHEMA_METADATA_CONTRIBUTOR = f"{CWL_NAMESPACE_SCHEMA_ID}:contributor" +CWL_NAMESPACE_SCHEMA_METADATA_DATE_CREATED = f"{CWL_NAMESPACE_SCHEMA_ID}:dateCreated" +CWL_NAMESPACE_SCHEMA_METADATA_LICENSE = f"{CWL_NAMESPACE_SCHEMA_ID}:license" +CWL_NAMESPACE_SCHEMA_METADATA_RELEASE_NOTES = f"{CWL_NAMESPACE_SCHEMA_ID}:releaseNotes" +CWL_NAMESPACE_SCHEMA_METADATA_VERSION = f"{CWL_NAMESPACE_SCHEMA_ID}:version" +CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION = f"{CWL_NAMESPACE_SCHEMA_ID}:softwareVersion" +CWL_NAMESPACE_SCHEMA_METADATA_SUPPORTED = [ + CWL_NAMESPACE_SCHEMA_METADATA_AUTHOR, + CWL_NAMESPACE_SCHEMA_METADATA_CITATION, + CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY, + CWL_NAMESPACE_SCHEMA_METADATA_CONTRIBUTOR, + CWL_NAMESPACE_SCHEMA_METADATA_DATE_CREATED, + CWL_NAMESPACE_SCHEMA_METADATA_LICENSE, + CWL_NAMESPACE_SCHEMA_METADATA_RELEASE_NOTES, + CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, + CWL_NAMESPACE_SCHEMA_METADATA_VERSION, + CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION, +] +""" +Fields that can be directly in the :term:`CWL` contents. +""" CWL_NAMESPACE_OGC_API_PROC_PART1_ID = "ogcapi-processes-1" CWL_NAMESPACE_OGC_API_PROC_PART1_URL = "https://schemas.opengis.net/ogcapi/processes/part1/1.0/openapi/" diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index e3ee684ca..a12fbb35f 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -74,6 +74,13 @@ from weaver.processes import opensearch from weaver.processes.constants import ( CWL_NAMESPACE_CWLTOOL_URL, + CWL_NAMESPACE_SCHEMA_ID, + CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY, + CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, + CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION, + CWL_NAMESPACE_SCHEMA_METADATA_SUPPORTED, + CWL_NAMESPACE_SCHEMA_METADATA_VERSION, + CWL_NAMESPACE_SCHEMA_URL, CWL_NAMESPACE_WEAVER_DEFINITION, CWL_NAMESPACE_WEAVER_ID, CWL_REQUIREMENT_APP_BUILTIN, @@ -227,16 +234,6 @@ PACKAGE_SCHEMA_CACHE = {} # type: Dict[str, Tuple[str, str]] -SUPPORTED_METADATA_MAPPING = [ - "s:author", - "s:citation", - "s:codeRepository", - "s:contributor", - "s:dateCreated", - "s:license", - "s:releaseNotes", -] - def get_status_location_log_path(status_location, out_dir=None): # type: (str, Optional[str]) -> str @@ -794,18 +791,25 @@ def _update_package_metadata(wps_package_metadata, cwl_package_package): metadata.append({"title": namespaces_inv[namespace_url], "href": schema}) wps_package_metadata["metadata"] = metadata - if "s:keywords" in cwl_package_package and isinstance(cwl_package_package["s:keywords"], list): + if ( + CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS in cwl_package_package and + isinstance(cwl_package_package[CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS], list) + ): wps_package_metadata["keywords"] = list( - set(wps_package_metadata.get("keywords", [])) | set(cwl_package_package.get("s:keywords", [])) + set(wps_package_metadata.get("keywords", [])) | + set(cwl_package_package.get(CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, [])) ) # specific use case with a different mapping # https://docs.ogc.org/bp/20-089r1.html#toc31 - if "s:version" in cwl_package_package or "s:softwareVersion" in cwl_package_package: + if ( + CWL_NAMESPACE_SCHEMA_METADATA_VERSION in cwl_package_package or + CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION in cwl_package_package + ): version_value = ( wps_package_metadata.get("version") - or cwl_package_package.get("s:version") - or cwl_package_package.get("s:softwareVersion") + or cwl_package_package.get(CWL_NAMESPACE_SCHEMA_METADATA_VERSION) + or cwl_package_package.get(CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION) ) # Only set the key if version_value is not empty or null if version_value: @@ -815,34 +819,48 @@ def _update_package_metadata(wps_package_metadata, cwl_package_package): if version_value: wps_package_metadata["version"] = str(version_value) - for metadata_mapping in SUPPORTED_METADATA_MAPPING: + schema_ns = f"{CWL_NAMESPACE_SCHEMA_ID}:" + for metadata_mapping in CWL_NAMESPACE_SCHEMA_METADATA_SUPPORTED: + if metadata_mapping in [ # skip handled above + CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, + CWL_NAMESPACE_SCHEMA_METADATA_VERSION, + CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION, + ]: + continue + if metadata_mapping in cwl_package_package: metadata = wps_package_metadata.get("metadata", []) if ( - isinstance((cwl_package_package[metadata_mapping]), str) + isinstance(cwl_package_package[metadata_mapping], str) and urlparse(cwl_package_package[metadata_mapping]).scheme != "" ): url = cwl_package_package[metadata_mapping] - if metadata_mapping == "s:codeRepository": - type = "text/html" + if metadata_mapping == CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY: + ctype = ContentType.TEXT_HTML else: - type = get_content_type(os.path.splitext(url)[-1], default=ContentType.TEXT_PLAIN) + ctype = get_content_type(os.path.splitext(url)[-1], default=ContentType.TEXT_PLAIN) metadata.append({ - "type": type, - "rel": metadata_mapping.strip("s:"), + "type": ctype, + "rel": metadata_mapping.strip(schema_ns), "href": cwl_package_package[metadata_mapping] }) + elif isinstance(cwl_package_package[metadata_mapping], str): + class_name = metadata_mapping.strip(schema_ns) + metadata.append({ + "role": class_name, + "value": cwl_package_package[metadata_mapping] + }) else: for objects in cwl_package_package[metadata_mapping]: - class_name = objects["class"].strip("s:") + class_name = objects["class"].strip(schema_ns) value = { - "$schema": f"https://schema.org/{class_name}" + "$schema": f"{CWL_NAMESPACE_SCHEMA_URL}{class_name}" } for key, val in objects.items(): - if key.startswith("s:"): - value[key.strip("s:")] = val + if key.startswith(schema_ns): + value[key.strip(schema_ns)] = val metadata.append({ - "role": metadata_mapping.strip("s:"), + "role": metadata_mapping.strip(schema_ns), "value": value }) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 0d82b9f9f..fe3e6b81c 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -69,6 +69,18 @@ CWL_NAMESPACE_OGC_API_PROC_PART1_ID, CWL_NAMESPACE_OGC_API_PROC_PART1_URL, CWL_NAMESPACE_SCHEMA_ID, + CWL_NAMESPACE_SCHEMA_METADATA_AUTHOR, + CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY, + CWL_NAMESPACE_SCHEMA_METADATA_CONTRIBUTOR, + CWL_NAMESPACE_SCHEMA_METADATA_DATE_CREATED, + CWL_NAMESPACE_SCHEMA_METADATA_EMAIL, + CWL_NAMESPACE_SCHEMA_METADATA_IDENTIFIER, + CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, + CWL_NAMESPACE_SCHEMA_METADATA_LICENSE, + CWL_NAMESPACE_SCHEMA_METADATA_NAME, + CWL_NAMESPACE_SCHEMA_METADATA_PERSON, + CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION, + CWL_NAMESPACE_SCHEMA_METADATA_VERSION, CWL_NAMESPACE_SCHEMA_URL, CWL_NAMESPACE_WEAVER_ID, CWL_NAMESPACE_WEAVER_URL, @@ -5752,17 +5764,72 @@ class CWLSchemas(ExtendedSequenceSchema): url = URL(title="CWLSchemaURL", description="Schema reference for the CWL definition.") -class CWLMetadata(ExtendedMappingSchema): +class CWLPerson(PermissiveMappingSchema): + _sort_first = [ + "class", + CWL_NAMESPACE_SCHEMA_METADATA_IDENTIFIER, + CWL_NAMESPACE_SCHEMA_METADATA_EMAIL, + CWL_NAMESPACE_SCHEMA_METADATA_NAME, + ] + _class = ExtendedSchemaNode( + String(), + name="class", + validator=OneOf([CWL_NAMESPACE_SCHEMA_METADATA_PERSON]), + description="Author of the Application Package.", + ) + _id = URI( + name=CWL_NAMESPACE_SCHEMA_METADATA_IDENTIFIER, + description="Reference identifier of the person. Typically, an ORCID URI.", + missing=drop, + ) + name = ExtendedSchemaNode( + String(), + name=CWL_NAMESPACE_SCHEMA_METADATA_NAME, + description="Name of the person.", + missing=drop, + ) + email = ExtendedSchemaNode( + String(), + name=CWL_NAMESPACE_SCHEMA_METADATA_NAME, + description="Email of the person.", + missing=drop, + ) + + +class CWLAuthors(ExtendedSequenceSchema): + item = CWLPerson() + validator = Length(min=1) + + + +class CWLDateCreated(OneOfKeywordSchema): + _one_of = [ + ExtendedSchemaNode( + DateTime(), + name=CWL_NAMESPACE_SCHEMA_METADATA_DATE_CREATED, + format="date-time", + description="Date-time of creation in ISO-8601 format.", + ) + ] + + +class CWLMetadata(PermissiveMappingSchema): _sort_first = [ "cwlVersion", "class", "id", - "version", "label", "doc", "intent", - f"{CWL_NAMESPACE_SCHEMA_ID}:author", - f"{CWL_NAMESPACE_SCHEMA_ID}:keywords", + "version", + CWL_NAMESPACE_SCHEMA_METADATA_DATE_CREATED, + CWL_NAMESPACE_SCHEMA_METADATA_VERSION, + CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION, + CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY, + CWL_NAMESPACE_SCHEMA_METADATA_LICENSE, + CWL_NAMESPACE_SCHEMA_METADATA_AUTHOR, + CWL_NAMESPACE_SCHEMA_METADATA_CONTRIBUTOR, + CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, ] _sort_after = ["$namespaces", "$schemas"] @@ -5770,17 +5837,44 @@ class CWLMetadata(ExtendedMappingSchema): label = ExtendedSchemaNode(String(), missing=drop) doc = ExtendedSchemaNode(String(), missing=drop) intent = ExtendedSchemaNode(String(), missing=drop) - author = ExtendedSchemaNode( - String(), - name=f"{CWL_NAMESPACE_SCHEMA_ID}:author", + version = Version(missing=drop) + s_version = Version(missing=drop, name=CWL_NAMESPACE_SCHEMA_METADATA_VERSION) + + author = CWLAuthors( + name=CWL_NAMESPACE_SCHEMA_METADATA_AUTHOR, missing=drop, - description="Author of the Application Package.", + description="Author(s) of the Application Package.", + ) + contributor = CWLAuthors( + name=CWL_NAMESPACE_SCHEMA_METADATA_CONTRIBUTOR, + missing=drop, + description="Contributor(s) of the Application Package.", ) keywords = KeywordList( - name=f"{CWL_NAMESPACE_SCHEMA_ID}:keywords", + name=CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, missing=drop, description="Keywords applied to the Application Package.", ) + license = URL( + name=CWL_NAMESPACE_SCHEMA_METADATA_LICENSE, + missing=drop, + description=( + "License related to the Application Package. " + "Preferably an URL to the specific license, but generic license URL is allowed." + ), + ) + code_repo = URL( + name=CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY, + missing=drop, + description="URL to the original code repository providing the Application Package.", + ) + date_created = ExtendedSchemaNode( + DateTime(), + name=CWL_NAMESPACE_SCHEMA_METADATA_DATE_CREATED, + format="date-time", + missing=drop, + description="Date-time of creation in ISO-8601 format.", + ) namespaces = CWLNamespaces(missing=drop) schemas = CWLSchemas(missing=drop) From 3bd7fbe599cbd0abf0c51c3355f179365cc64d1b Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 19 Nov 2024 14:06:13 -0500 Subject: [PATCH 02/24] fix linting --- weaver/wps_restapi/swagger_definitions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index fe3e6b81c..21475c1c2 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -5801,7 +5801,6 @@ class CWLAuthors(ExtendedSequenceSchema): validator = Length(min=1) - class CWLDateCreated(OneOfKeywordSchema): _one_of = [ ExtendedSchemaNode( From cb8464d701f3604ec6e74474456d86e3813049f4 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 19 Nov 2024 23:11:35 -0500 Subject: [PATCH 03/24] add junit codecov update for flaky test analysis (https://docs.codecov.com/docs/test-analytics) --- .github/workflows/tests.yml | 8 ++++++++ Makefile | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 29c3de8dc..ee1b554d9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -130,6 +130,14 @@ jobs: env | sort - name: Run Tests run: make stop ${{ matrix.test-case }} + + - name: Upload test results to Codecov + if: ${{ !cancelled() && success() && matrix.test-case == 'test-coverage-only' }} + uses: codecov/test-results-action@v1 + with: + files: ./junit.xml,!./cache + flags: ${{ matrix.python-version }} + token: ${{ secrets.CODECOV_TOKEN }} - name: Upload coverage report uses: codecov/codecov-action@v2 if: ${{ success() && matrix.test-case == 'test-coverage-only' }} diff --git a/Makefile b/Makefile index be0a523c7..a5e9c67f8 100644 --- a/Makefile +++ b/Makefile @@ -450,7 +450,8 @@ test-docker: docker-test ## alias to 'docker-test' execution smoke test of bu .PHONY: test-coverage-only test-coverage-only: mkdir-reports ## run all tests using coverage analysis @echo "Running coverage analysis..." - @bash -c '$(CONDA_CMD) coverage run --rcfile="$(APP_ROOT)/setup.cfg" "$$(which pytest)" "$(APP_ROOT)/tests" || true' + @bash -c '$(CONDA_CMD) coverage run --rcfile="$(APP_ROOT)/setup.cfg" \ + "$$(which pytest)" "$(APP_ROOT)/tests" --junitxml="$(REPORTS_DIR)/coverage-junit.xml" || true' @bash -c '$(CONDA_CMD) coverage xml --rcfile="$(APP_ROOT)/setup.cfg" -i -o "$(REPORTS_DIR)/coverage.xml"' @bash -c '$(CONDA_CMD) coverage report --rcfile="$(APP_ROOT)/setup.cfg" -i -m' @bash -c '$(CONDA_CMD) coverage html --rcfile="$(APP_ROOT)/setup.cfg" -d "$(REPORTS_DIR)/coverage"' From 90af73c52a58c6e5e12ec2ccdbfb41153a9de3fb Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 19 Nov 2024 23:49:49 -0500 Subject: [PATCH 04/24] add coverage tests to missing code paths --- tests/processes/test_wps_package.py | 154 +++++++++++++++++----------- weaver/processes/wps_package.py | 2 +- 2 files changed, 93 insertions(+), 63 deletions(-) diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py index 4c81100aa..4eab31147 100644 --- a/tests/processes/test_wps_package.py +++ b/tests/processes/test_wps_package.py @@ -6,6 +6,7 @@ """ import contextlib import copy +import inspect import io import itertools import json @@ -16,7 +17,7 @@ import sys import tempfile import warnings -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast import cwltool.process import mock @@ -33,6 +34,9 @@ from weaver.exceptions import PackageExecutionError, PackageTypeError from weaver.formats import ContentType from weaver.processes.constants import ( + CWL_NAMESPACE_SCHEMA_DEFINITION, + CWL_NAMESPACE_SCHEMA_ID, + CWL_NAMESPACE_SCHEMA_URL, CWL_REQUIREMENT_APP_DOCKER, CWL_REQUIREMENT_APP_DOCKER_GPU, CWL_REQUIREMENT_CUDA, @@ -57,10 +61,10 @@ from weaver.wps_restapi import swagger_definitions as sd if TYPE_CHECKING: - from typing import Any, Dict, TypeVar + from typing import Any, Callable, Dict, TypeVar, Union from typing_extensions import Literal - from weaver.typedefs import CWL + from weaver.typedefs import CWL, CWL_AnyRequirements, ProcessOfferingMapping KT = TypeVar("KT") VT_co = TypeVar("VT_co", covariant=True) @@ -302,8 +306,8 @@ def _add_requirement(reqs1, reqs2): if field not in reqs2: continue reqs1.setdefault(field, {}) - defs1 = reqs1[field] - defs2 = reqs2[field] + defs1 = cast("CWL_AnyRequirements", reqs1[field]) # type: CWL_AnyRequirements + defs2 = cast("CWL_AnyRequirements", reqs2[field]) # type: CWL_AnyRequirements if isinstance(defs1, list): if isinstance(defs2, dict): defs2 = [_combine({"class": req}, val) for req, val in defs2.items()] @@ -847,18 +851,16 @@ def test_format_extension_validator_basic(data_input, mode, expect): @pytest.mark.parametrize( - ["cwl_package_original", "cwl_package_expected"], + ["cwl_package_package", "wps_package_metadata", "cwl_package_expected"], [ ( # Test author metadata with empty wps_package { - "cwl_package_package": { - "s:author": [ - {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."} - ], - }, - "wps_package_metadata": {} + "s:author": [ + {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."} + ], }, + {}, { "abstract": "", "title": "", @@ -877,11 +879,9 @@ def test_format_extension_validator_basic(data_input, mode, expect): ( # Test codeRepository { - "cwl_package_package": { - "s:codeRepository": "https://gitlab.com/", - }, - "wps_package_metadata": {} + "s:codeRepository": "https://gitlab.com/", }, + {}, { "abstract": "", "title": "", @@ -897,18 +897,16 @@ def test_format_extension_validator_basic(data_input, mode, expect): ( # Test Version with existing metadata { - "cwl_package_package": { - "s:version": "1.0" - }, - "wps_package_metadata": { - "metadata": [ - { - "type": "text/html", - "rel": "codeRepository", - "href": "https://gitlab.com/" - } - ] - } + "s:version": "1.0" + }, + { + "metadata": [ + { + "type": "text/html", + "rel": "codeRepository", + "href": "https://gitlab.com/" + } + ] }, { "abstract": "", @@ -926,11 +924,9 @@ def test_format_extension_validator_basic(data_input, mode, expect): ( # Test softwareVersion { - "cwl_package_package": { - "s:softwareVersion": "1.0.0" - }, - "wps_package_metadata": {} + "s:softwareVersion": "1.0.0" }, + {}, { "abstract": "", "title": "", @@ -940,14 +936,12 @@ def test_format_extension_validator_basic(data_input, mode, expect): ( # Test contributor { - "cwl_package_package": { - "s:contributor": [ - {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."}, - {"class": "s:Person", "s:name": "Other Guy", "s:affiliation": "Elsewhere"}, - ], - }, - "wps_package_metadata": {} + "s:contributor": [ + {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."}, + {"class": "s:Person", "s:name": "Other Guy", "s:affiliation": "Elsewhere"}, + ], }, + {}, { "abstract": "", "title": "", @@ -974,11 +968,9 @@ def test_format_extension_validator_basic(data_input, mode, expect): ( # Test citation { - "cwl_package_package": { - "s:citation": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" - }, - "wps_package_metadata": {} + "s:citation": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" }, + {}, { "abstract": "", "title": "", @@ -994,20 +986,18 @@ def test_format_extension_validator_basic(data_input, mode, expect): ( # Test dateCreated with existing metadata { - "cwl_package_package": { - "s:dateCreated": "2016-12-13", - }, - "wps_package_metadata": { - "abstract": "", - "title": "", - "metadata": [ - { - "type": "text/plain", - "rel": "citation", - "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" - }, - ], - } + "s:dateCreated": "2016-12-13", + }, + { + "abstract": "", + "title": "", + "metadata": [ + { + "type": "text/plain", + "rel": "citation", + "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + }, + ], }, { "abstract": "", @@ -1025,13 +1015,53 @@ def test_format_extension_validator_basic(data_input, mode, expect): ] } ), + ( + # test CWL '$schemas' and '$namespace' mapping to alternate metadata references + { + "$schemas": [CWL_NAMESPACE_SCHEMA_URL], + "$namespaces": dict(CWL_NAMESPACE_SCHEMA_DEFINITION), + }, + { + "metadata": [ + { + "role": "test", + "value": "test", + } + ] + }, + { + "abstract": "", + "title": "", + "metadata": [ + { + "role": "test", + "value": "test", + }, + { + "title": CWL_NAMESPACE_SCHEMA_ID, + "href": CWL_NAMESPACE_SCHEMA_URL, + } + ] + } + ), + ( + # test CWL 's:keywords' vs WPS 'keywords' + { + "s:keywords": ["a", "b", "c"], + }, + { + "keywords": ["a", "x", "y", "d", "e", "f"], + }, + lambda src: set(src["keywords"]) == {"a", "b", "c", "x", "y", "d", "e", "f"}, + ), ] ) -def test_process_metadata(cwl_package_original, cwl_package_expected): - # type: (CWL, CWL) -> None - cwl_package_package = cwl_package_original["cwl_package_package"] - wps_package_metadata = cwl_package_original["wps_package_metadata"] +def test_process_metadata(cwl_package_package, wps_package_metadata, cwl_package_expected): + # type: (CWL, ProcessOfferingMapping, Union[CWL, Callable[[CWL], bool]]) -> None cwl_package_validated = sd.CWLMetadata().deserialize(cwl_package_package) # must not raise assert cwl_package_validated == cwl_package_package # should be unchanged _update_package_metadata(wps_package_metadata, cwl_package_package) - assert wps_package_metadata == cwl_package_expected + if inspect.isfunction(cwl_package_expected): + assert cwl_package_expected(wps_package_metadata) + else: + assert wps_package_metadata == cwl_package_expected diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index a12fbb35f..579603f76 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -784,7 +784,7 @@ def _update_package_metadata(wps_package_metadata, cwl_package_package): and isinstance(cwl_package_package["$namespaces"], dict) ): metadata = wps_package_metadata.get("metadata", []) - namespaces_inv = {v: k for k, v in cwl_package_package["$namespaces"]} + namespaces_inv = {v: k for k, v in cwl_package_package["$namespaces"].items()} for schema in cwl_package_package["$schemas"]: for namespace_url in namespaces_inv: if schema.startswith(namespace_url): From f6b1d8cba559c4233139edcc72bb2d97d9533529 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Wed, 20 Nov 2024 00:30:17 -0500 Subject: [PATCH 05/24] patch coverange junit location for codecov upload --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ee1b554d9..aab149e47 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -135,7 +135,7 @@ jobs: if: ${{ !cancelled() && success() && matrix.test-case == 'test-coverage-only' }} uses: codecov/test-results-action@v1 with: - files: ./junit.xml,!./cache + files: reports/coverage-junit.xml,!./cache flags: ${{ matrix.python-version }} token: ${{ secrets.CODECOV_TOKEN }} - name: Upload coverage report From a590f6214de708d1c94c32e60e28b2d4d2ca4c50 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 21 Nov 2024 12:28:43 -0500 Subject: [PATCH 06/24] fix detection of executionUnit href using mapping representation + patch pytest markers with parametrized --- CHANGES.rst | 1 + docs/source/processes.rst | 12 +++++++----- tests/functional/test_wps_package.py | 2 +- tests/wps_restapi/test_jobs.py | 10 +++++----- tests/wps_restapi/test_processes.py | 13 ++++++++++--- weaver/processes/builtin/collection_processor.cwl | 1 - weaver/processes/utils.py | 2 +- 7 files changed, 25 insertions(+), 16 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d515cfa39..aabf7e3f6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -65,6 +65,7 @@ Changes: Fixes: ------ +- Fix ``href`` detection when provided directly as mapping within the ``executionUnit`` of the deployment body. - Fix definition of `CWL` ``schema.org`` namespaced fields (i.e.: ``s:author`` and ``s:dateCreated``) causing schema deserialization error when validation the submitted request body against typical examples provided in `CWL Metadata and Authorship `_. diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 98d2037ba..2edc5c1fb 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -414,13 +414,15 @@ Deployment of a new process is accomplished through the ``POST {WEAVER_URL}/proc The request body requires mainly two components: - | ``processDescription``: - | Defines the process identifier, metadata, inputs, outputs, and some execution specifications. This mostly - corresponds to information that is provided by traditional :term:`WPS` definition. + | Defines the :term:`Process` identifier, metadata, inputs, outputs, and some execution specifications. + This mostly corresponds to information that is provided by traditional :term:`WPS` + or :term:`OGC API - Processes` definitions. - | ``executionUnit``: - | Defines the core details of the `Application Package`_. This corresponds to the explicit :term:`CWL` definition - that indicates how to execute the given application. + | Defines the core details of the |app_pkg|_. This corresponds to the explicit :term:`CWL` definition + or other :ref:`proc_types` references that indicates how to execute the underlying application. -.. _Application Package: docs/source/package.rst +.. |app_pkg| replace:: Application Package +.. _app_pkg: docs/source/package.rst Upon deploy request, `Weaver` will either respond with a successful result, or with the appropriate error message, whether caused by conflicting ID, invalid definitions or other parsing issues. A successful process deployment will diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index 9a34fc448..f3d59e207 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -4655,11 +4655,11 @@ def test_execute_single_output_response_document_default_format_json_special(sel }, } - @pytest.mark.oap_part1 @parameterized.expand([ ContentType.MULTIPART_ANY, ContentType.MULTIPART_MIXED, ]) + @pytest.mark.oap_part1 def test_execute_multi_output_multipart_accept(self, multipart_header): """ Requesting ``multipart`` explicitly should return it instead of default :term:`JSON` ``document`` response. diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py index 004388df0..b8f606ff4 100644 --- a/tests/wps_restapi/test_jobs.py +++ b/tests/wps_restapi/test_jobs.py @@ -339,8 +339,6 @@ def test_get_jobs_detail_grouped(self): for job in grouped_jobs["jobs"]: self.check_job_format(job) - @pytest.mark.html - @pytest.mark.oap_part1 @parameterized.expand([ ({}, ), # detail omitted should apply it for HTML, unlike JSON that returns the simplified listing by default ({"detail": None}, ), @@ -353,6 +351,8 @@ def test_get_jobs_detail_grouped(self): ({"detail": "False"}, ), ({"detail": "no"}, ), ]) + @pytest.mark.html + @pytest.mark.oap_part1 def test_get_jobs_detail_html_enforced(self, params): """ Using :term:`HTML`, ``detail`` response is always enforced to allow rendering, regardless of the parameter. @@ -779,7 +779,6 @@ def test_get_jobs_process_unknown_in_query(self): assert resp.status_code == 404 assert resp.content_type == ContentType.APP_JSON - @pytest.mark.oap_part1 @parameterized.expand([ get_path_kvp( sd.jobs_service.path, @@ -818,6 +817,7 @@ def test_get_jobs_process_unknown_in_query(self): service="provider-2", ), ]) + @pytest.mark.oap_part1 def test_get_jobs_process_or_service_mismatch_in_path_or_query(self, path): # type: (str) -> None """ @@ -1820,13 +1820,13 @@ def test_job_outputs_response(self): assert resp.status_code == 200 assert resp.json["outputs"] == {"test": {"value": "data"}} - @pytest.mark.oap_part4 @pytest.mark.xfail(reason="CWL PROV not implemented (https://github.com/crim-ca/weaver/issues/673)") + @pytest.mark.oap_part4 def test_job_run_response(self): raise NotImplementedError # FIXME (https://github.com/crim-ca/weaver/issues/673) - @pytest.mark.oap_part4 @parameterized.expand([Status.ACCEPTED, Status.RUNNING, Status.FAILED, Status.SUCCEEDED]) + @pytest.mark.oap_part4 def test_job_update_locked(self, status): new_job = self.make_job( task_id=self.fully_qualified_test_name(), process=self.process_public.identifier, service=None, diff --git a/tests/wps_restapi/test_processes.py b/tests/wps_restapi/test_processes.py index 797f3957c..e8ff8dce7 100644 --- a/tests/wps_restapi/test_processes.py +++ b/tests/wps_restapi/test_processes.py @@ -1055,7 +1055,13 @@ def get_cwl_docker_python_version(cwl_version="v1.0", process_id=None): }) return cwl - def test_deploy_process_CWL_DockerRequirement_href(self): + @parameterized.expand([ + ("mapping", ), + ("listing", ), + ]) + @pytest.mark.oap_part2 + def test_deploy_process_CWL_DockerRequirement_href(self, exec_unit_style): + # type: (Literal["mapping", "listing"]) -> None with contextlib.ExitStack() as stack: stack.enter_context(mocked_wps_output(self.settings)) out_dir = self.settings["weaver.wps_output_dir"] @@ -1069,9 +1075,10 @@ def test_deploy_process_CWL_DockerRequirement_href(self): json.dump(cwl, cwl_file) p_id = "test-docker-python-version" + unit = [{"href": tmp_href}] if exec_unit_style == "listing" else {"href": tmp_href} body = { "processDescription": {"process": {"id": p_id}}, - "executionUnit": [{"href": tmp_href}], + "executionUnit": unit, "deploymentProfileName": "http://www.opengis.net/profiles/eoc/dockerizedApplication", } desc = self.deploy_process_make_visible_and_fetch_deployed(body, p_id, assert_io=False) @@ -2570,12 +2577,12 @@ def test_process_description_metadata_href_or_value_invalid(self): else: self.fail(f"Metadata is expected to be raised as invalid: (test: {i}, metadata: {meta})") - @pytest.mark.oap_part3 @parameterized.expand([ ({}, {}, True), # no outputs returned ({}, {"result1": "data", "result2": 123}, True), # too many outputs returned (not explicitly requested) ({"result1": {}, "result2": {}}, {"result1": "data", "result2": 123}, False), # too many outputs requested ]) + @pytest.mark.oap_part3 def test_execute_process_nested_invalid_results_amount(self, test_outputs, mock_result, expect_execute): proc_path = f"/processes/{self.process_public.identifier}" exec_path = f"{proc_path}/jobs" diff --git a/weaver/processes/builtin/collection_processor.cwl b/weaver/processes/builtin/collection_processor.cwl index 805671b6c..14bf8b194 100644 --- a/weaver/processes/builtin/collection_processor.cwl +++ b/weaver/processes/builtin/collection_processor.cwl @@ -1,7 +1,6 @@ #! /usr/bin/env cwl-runner cwlVersion: v1.0 class: CommandLineTool -id: collection_processor label: Collection Processor doc: | Retrieves relevant data or files resolved from a collection reference using its metadata, queries and desired outputs. diff --git a/weaver/processes/utils.py b/weaver/processes/utils.py index 13f7acaa8..d749cc130 100644 --- a/weaver/processes/utils.py +++ b/weaver/processes/utils.py @@ -467,7 +467,7 @@ def deploy_process_from_payload(payload, container, overwrite=False): # pylint: raise HTTPBadRequest("Invalid value for parameter 'deploymentProfileName'.") execution_units = payload.get("executionUnit") if isinstance(execution_units, dict): - if "unit" not in execution_units: + if "unit" not in execution_units and "href" not in execution_units: execution_units = {"unit": execution_units} execution_units = [execution_units] if not isinstance(execution_units, list) or not len(execution_units) == 1: From 8c69ed9bab8026ff5ce10d336117805181e36f34 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 21 Nov 2024 18:02:29 -0500 Subject: [PATCH 07/24] allow CLI override of url parameter on each operation --- CHANGES.rst | 1 + tests/test_cli.py | 34 ++++++++++++++++++++++++++-------- weaver/cli.py | 4 +++- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index aabf7e3f6..1fba4be9d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -65,6 +65,7 @@ Changes: Fixes: ------ +- Fix ``url`` parameter to override the `CLI` internal ``url`` when passed explicitly to the invoked operation. - Fix ``href`` detection when provided directly as mapping within the ``executionUnit`` of the deployment body. - Fix definition of `CWL` ``schema.org`` namespaced fields (i.e.: ``s:author`` and ``s:dateCreated``) causing schema deserialization error when validation the submitted request body against typical examples provided in diff --git a/tests/test_cli.py b/tests/test_cli.py index 8e69859d3..9cb245279 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -52,12 +52,30 @@ def test_cli_url_required_in_client_or_param(): WeaverClient().execute("") try: with mock.patch("weaver.cli.WeaverClient._parse_inputs", return_value=OperationResult()): - WeaverClient(url="http://fake.domain.com").execute("") - WeaverClient().execute("", url="http://fake.domain.com") + WeaverClient(url="https://fake.domain.com").execute("") + WeaverClient().execute("", url="https://fake.domain.com") except ValueError: pytest.fail() +@pytest.mark.cli +def test_cli_url_override_by_operation(): + with mock.patch("weaver.cli.WeaverClient._parse_inputs", return_value=OperationResult()): + client = WeaverClient(url="https://fake.domain.com") + real_get_url = WeaverClient._get_url + returned_url = [] + + def mock_get_url(_url): + _url = real_get_url(client, _url) + returned_url.append(_url) + return _url + + with mock.patch("weaver.cli.WeaverClient._get_url", side_effect=mock_get_url) as mocked: + client.execute(url="https://other.domain.com", process_id="random") + assert mocked.call_args.args[0] == "https://other.domain.com" + assert returned_url[0] == "https://other.domain.com" + + @pytest.mark.cli def test_parse_inputs_from_file(): inputs = [] @@ -73,7 +91,7 @@ def parsed_inputs(_inputs, *_, **__): json.dump({"inputs": {"input1": "data"}}, input_json) input_json.flush() input_json.seek(0) - result = WeaverClient().execute("fake_process", inputs=input_json.name, url="http://fake.domain.com") + result = WeaverClient().execute("fake_process", inputs=input_json.name, url="https://fake.domain.com") assert result is mock_result assert len(inputs) == 1 assert inputs[0] == {"input1": "data"} @@ -133,7 +151,7 @@ def parsed_inputs(_inputs, *_, **__): # use '_upload_files' to early-stop the operation, since it is the step right after parsing inputs with mock.patch("weaver.cli.WeaverClient._upload_files", side_effect=parsed_inputs): - result = WeaverClient().execute("fake_process", inputs=data_inputs, url="http://fake.domain.com") + result = WeaverClient().execute("fake_process", inputs=data_inputs, url="https://fake.domain.com") assert result is mock_result assert len(inputs) == 1 assert inputs[0] == expect_inputs @@ -167,7 +185,7 @@ def no_error_cli(*args): # different media-type than YAML on purpose to ensure parsing uses provided value, and not extension "-I", f"input:File={input_yaml.name}@mediaType={ContentType.APP_CWL}", "-p", "fake_process", - "-u", "http://fake.domain.com", + "-u", "https://fake.domain.com", "-q", # since CLI fails purposely, avoid logging errors which would be confusing if debugging logs ], entrypoint=no_error_cli) @@ -180,8 +198,8 @@ def no_error_cli(*args): } inputs.clear() - schema_json = "http://schema.org/random.json" - schema_xml = "http://schema.org/other.xml" + schema_json = "https://schema.org/random.json" + schema_xml = "https://schema.org/other.xml" with mock.patch("weaver.cli.WeaverClient._upload_files", side_effect=parsed_inputs): with ExitStack() as stack: input_yaml = stack.enter_context(tempfile.NamedTemporaryFile(mode="w", suffix=".yml")) @@ -213,7 +231,7 @@ def no_error_cli(*args): "-I", f"input:File={input_json.name}@type={ContentType.APP_JSON}@rel=schema", "-I", f"other:File={input_tif.name}@mediaType={ctype_tif_escaped}@encoding=base64@rel=image", "-p", "fake_process", - "-u", "http://fake.domain.com", + "-u", "https://fake.domain.com", "-q", # since CLI fails purposely, avoid logging errors which would be confusing if debugging logs ], entrypoint=no_error_cli) diff --git a/weaver/cli.py b/weaver/cli.py index 4ee736c26..c943a5d44 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -470,7 +470,9 @@ def _get_url(self, url): # type: (Optional[str]) -> str if not self._url and not url: raise ValueError("No URL available. Client was not created with an URL and operation did not receive one.") - return self._url or self._parse_url(url) + if url: + return self._parse_url(url) + return self._url @staticmethod def _parse_url(url): From dbe441d43c4fb0d4f0feb287d711a0dea7158170 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 21 Nov 2024 23:18:23 -0500 Subject: [PATCH 08/24] resolve OAS array with enum for OAP input directly specified --- CHANGES.rst | 2 + tests/processes/test_convert.py | 78 +++++++++++++++++++++++++++++++++ weaver/processes/convert.py | 28 ++++++++++-- 3 files changed, 105 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index d515cfa39..f10127de3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -65,6 +65,8 @@ Changes: Fixes: ------ +- Fix `CWL` conversion from a `OGC API - Processes` definition specifying an `I/O` with ``schema`` explicitly + indicating a ``type: array`` and nested ``enum``, even if ``minOccurs: 1`` is omitted or explicitly set. - Fix definition of `CWL` ``schema.org`` namespaced fields (i.e.: ``s:author`` and ``s:dateCreated``) causing schema deserialization error when validation the submitted request body against typical examples provided in `CWL Metadata and Authorship `_. diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index caa1a1be7..e61ed9cfd 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -532,6 +532,84 @@ def test_any2cwl_io_from_oas(): "inputBinding": {"valueFrom": _get_cwl_js_value_from([1, 2, 3], allow_unique=True, allow_array=True)}, }, ), + ( + IO_INPUT, + { + "id": "test", + "schema": { + "type": "array", + "items": { + "type": "string", + "enum": ["a", "b", "c"] + } + } + }, + { + "id": "test", + # note: "type" as array of single mapping with "type: array" would also be valid ([] around the {}) + "type": { + "type": "array", + "items": { + "type": "enum", + "symbols": ["a", "b", "c"], + }, + } + } + ), + ( + IO_INPUT, + { + "id": "test", + "minOccurs": 0, + "schema": { + "type": "array", + "items": { + "type": "string", + "enum": ["a", "b", "c"] + } + } + }, + { + "id": "test", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "enum", + "symbols": ["a", "b", "c"], + }, + } + ] + } + ), + ( + IO_INPUT, + { + "id": "test", + "minOccurs": 0, + "schema": { + "type": "array", + "items": { + "type": "string", + "enum": ["a", "b", "c"] + } + } + }, + { + "id": "test", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "enum", + "symbols": ["a", "b", "c"], + }, + } + ] + } + ) ] ) def test_any2cwl_io_enum_convert(io_select, test_io, expect): diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index ed0f8c297..1d769358e 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -928,24 +928,46 @@ def any2cwl_io(wps_io, io_select): # convert OAS format to JSON first to simplify following comparisons wps_io_type = get_field(wps_io, "type", search_variations=True) wps_io_schema = get_field(wps_io, "schema", search_variations=False) + + # check for min/max occurs before 'schema' conversion because they + # can be employed with either the WPS or the OGC API representations + wps_min_occ = get_field(wps_io, "min_occurs", search_variations=True) + wps_max_occ = get_field(wps_io, "max_occurs", search_variations=True) + + schema_array = False if wps_io_type is null and isinstance(wps_io_schema, dict): wps_io = oas2json_io(wps_io_schema) wps_io_cat = get_field(wps_io, "type", search_variations=False) wps_io_type = get_field(wps_io, "data_type", search_variations=False) + schema_array = get_field(wps_io_schema, "type", search_variations=False) == "array" + + # re-check for min/max occurs in case some details where inferred from the 'schema' + if wps_min_occ is null: + wps_min_occ = get_field(wps_io, "min_occurs", search_variations=True, default=1) + if wps_max_occ is null: + wps_max_occ = get_field(wps_io, "max_occurs", search_variations=True) wps_default = get_field(wps_io, "default", search_variations=True) - wps_min_occ = get_field(wps_io, "min_occurs", search_variations=True, default=1) - wps_max_occ = get_field(wps_io, "max_occurs", search_variations=True) is_min_null = wps_min_occ in [0, "0"] allow_unique = wps_min_occ in [0, "0", 1, "1"] allow_array = wps_max_occ != null and (wps_max_occ == "unbounded" or wps_max_occ > 1) + # If the OGC API 'schema' of the I/O explicitly requested for an array, + # min occurrence of 1 should be interpreted as needing at least 1 value *in the array*. + # This reflects the typical expectation since omitting the occurrence means 1 by default, + # but an implementation that desires a direct value wouldn't usually indicate 'type: array'. + # (relates to https://github.com/opengeospatial/ogcapi-processes/issues/414) + if schema_array: + allow_unique = False + allow_array = True + if wps_io_cat not in list(WPS_COMPLEX_TYPES): cwl_io_type = any2cwl_literal_datatype(wps_io_type) if cwl_io_type is null: LOGGER.warning("Could not identify a CWL literal data type with [%s].", wps_io_type) wps_allow = get_field(wps_io, "allowed_values", search_variations=True) if isinstance(wps_allow, list) and len(wps_allow) > 0: + cwl_io_enum = _convert_cwl_io_enum(cwl_io_type, wps_allow, io_select, allow_unique, allow_array) cwl_io.update(cwl_io_enum) else: @@ -971,7 +993,7 @@ def any2cwl_io(wps_io, io_select): "items": cwl_io["type"] } # if single value still allowed, or explicitly multi-value array if min greater than one - if wps_min_occ > 1: + if wps_min_occ > 1 or not allow_unique: cwl_io["type"] = cwl_array else: cwl_io["type"] = [cwl_io["type"], cwl_array] From 6ca40530ee8e785167902b94cee67b498672238d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 21 Nov 2024 23:18:51 -0500 Subject: [PATCH 09/24] update docs with more I/O conversion examples --- docs/source/package.rst | 241 ++++++++++++++++++++++++++++------------ 1 file changed, 168 insertions(+), 73 deletions(-) diff --git a/docs/source/package.rst b/docs/source/package.rst index 77557094d..b3a84576a 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -554,18 +554,18 @@ Inputs/Outputs ID ----------------------- Inputs and outputs (:term:`I/O`) ``id`` from the :term:`CWL` context will be respectively matched against corresponding -``id`` or ``identifier`` field from I/O of :term:`WPS` context. In the :term:`CWL` definition, all of the allowed I/O -structures are supported, whether they are specified using an array list with explicit definitions, using "shortcut" -variant (i.e.: ``[]``), or using key-value pairs (see |cwl-io-map|_ for more details). Regardless of array or -mapping format, :term:`CWL` requires that all I/O have unique ``id``. -On the :term:`WPS` side, either a mapping or list of I/O are also expected with unique ``id``. +``id`` or ``identifier`` field from :term:`I/O` of :term:`WPS` context. In the :term:`CWL` definition, all of the +allowed :term:`I/O` structures are supported, whether they are specified using an array list with explicit definitions, +using "shortcut" variant (i.e.: ``[]``), or using key-value pairs (see |cwl-io-map|_ for more details). +Regardless of array or mapping format, :term:`CWL` requires that all :term:`I/O` have unique ``id``. +On the :term:`WPS` side, either a mapping or list of :term:`I/O` are also expected with unique ``id``. .. versionchanged:: 4.0 - Previous versions only supported :term:`WPS` I/O using the listing format. Both can be used interchangeably in - both :term:`CWL` and :term:`WPS` contexts as of this version. + Previous versions only supported :term:`WPS` :term:`I/O` using the listing format. + Both can be used interchangeably in both :term:`CWL` and :term:`WPS` contexts as of this version. -To summarize, the following :term:`CWL` and :term:`WPS` I/O definitions are all equivalent and will result into the -same process definition after deployment. For simplification purpose, below examples omit all but mandatory fields +To summarize, the following :term:`CWL` and :term:`WPS` :term:`I/O` definitions are all equivalent and will result into +the same process definition after deployment. For simplification purpose, below examples omit all but mandatory fields (only of the ``inputs`` and ``outputs`` portion of the full deployment body) to produce the same result. Other fields are discussed afterward in specific sections. @@ -616,11 +616,13 @@ the :ref:`Deploy ` request body with any of the following variat .. warning:: `Weaver` assumes that its main purpose is to eventually execute an :term:`Application Package` and will therefore prioritize specification in :term:`CWL` over :term:`WPS` to infer types. Because of this, any unmatched ``id`` from - the :term:`WPS` context against provided :term:`CWL` ``id``\s of the same I/O section **will be dropped**, as they - ultimately would have no purpose during :term:`CWL` execution. + the :term:`WPS` context against provided :term:`CWL` ``id``\s of the same :term:`I/O` section **will be dropped**, + as they ultimately would have no purpose during :term:`CWL` execution. This does not apply in the case of referenced :ref:`proc_wps_12` processes since no :term:`CWL` is available in the - first place. + first place. Similarly, when deploying a :ref:`Remote OGC API - Processes ` by :term:`URL` reference, + it is expected that only the :term:`OAS` context (see :ref:`oas_io_schema`) with ``schema`` are provided. Therefore, + these definitions could overrule the :term:`CWL` resolution that would normally occur. .. _cwl-io-types: @@ -650,7 +652,10 @@ The simplification of types can happen when converting in any direction It all depends on which definitions that were provided are the more specific. For example, a :term:`WPS` ``dateTime`` will be simplified to a generic :term:`CWL` ``string``, and into an :term:`OAS` ``string`` with ``format: "date-time"``. In this example, it would be important to provide the :term:`WPS` or :term:`OAS` definitions if the *date-time* portion -was critical, since it could not be inferred only from :term:`CWL` ``string``. +was critical, since it could not be inferred only from :term:`CWL` ``string`` (since it doesn't define this concept). + +.. seealso:: + Further details for the :term:`OAS` context are provided in :ref:`oas_io_schema`. Further details regarding handling methods or important considerations for specific types will be presented in :ref:`cwl-type` and :ref:`cwl-dir` sections. @@ -740,10 +745,10 @@ In the :term:`WPS` context, three data types exist, namely ``Literal``, ``Boundi As presented in previous examples, :term:`I/O` in the :term:`WPS` context does not require an explicit indication of which data type from one of ``Literal``, ``BoundingBox`` and ``Complex`` to apply. Instead, :term:`WPS` type can be -inferred using the matched API schema of the I/O. For instance, ``Complex`` I/O (e.g.: file reference) requires the -``formats`` field to distinguish it from a plain ``string``. Therefore, specifying either ``format`` in :term:`CWL` -or ``formats`` in :term:`WPS` immediately provides all needed information for `Weaver` to understand that this I/O is -expected to be a file reference. +inferred using the matched API schema of the :term:`I/O`. For instance, ``Complex`` :term:`I/O` +(e.g.: :ref:`File Reference `) requires the ``formats`` field to distinguish it from a plain ``string``. +Therefore, specifying either ``format`` in :term:`CWL` or ``formats`` in :term:`WPS` immediately provides all needed +information for `Weaver` to understand that this :term:`I/O` is expected to be a file reference. .. code-block:: json :caption: WPS Complex Data Type @@ -884,7 +889,7 @@ Following is an example where input definitions are equivalent in both :term:`CW +-----------------------------------------------+-----------------------------------------------------------------+ | .. code-block:: json | .. code-block:: json | - | :caption: WPS Format with MIME-type | :caption: CWL Format with Namespace | + | :caption: :term:`WPS` Format with MIME-type| :caption: :term:`CWL` Format with Namespace | | :linenos: | :linenos: | | | | | { | { | @@ -982,18 +987,18 @@ the following two variants are equivalent and completely interchangeable. :widths: 50,50 +---------------------------------------------------+-----------------------------------------------+ - | .. code-block:: json | .. code-block:: json | - | :caption: WPS AllowedValues Input | :caption: CWL Enum Values | - | :linenos: | :linenos: | - | | | - | { | { | - | "id": "input", | "id": "input", | - | "literalDataDomains": [ | "type": { | - | {"allowedValues": ["value-1", "value-2"]} | "type": "enum", | - | ] | "symbols": ["value-1", "value-2"] | - | } | } | - | | } | - +---------------------------------------------------+-----------------------------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: :term:`WPS` AllowedValues Input | :caption: :term:`CWL` Enum Values | + | :linenos: | :linenos: | + | | | + | { | { | + | "id": "input", | "id": "input", | + | "literalDataDomains": [ | "type": { | + | {"allowedValues": ["value-1", "value-2"]} | "type": "enum", | + | ] | "symbols": ["value-1", "value-2"] | + | } | } | + | | } | + +---------------------------------------------------+-----------------------------------------------+ `Weaver` will ensure to propagate such definitions bidirectionally in order to update the :term:`CWL` or :term:`WPS` correspondingly with the provided information in the other context if missing. The primitive type to apply to a missing @@ -1026,8 +1031,8 @@ than :term:`CWL` because all details are contained within the same two parameter preferable to provide the ``minOccurs`` and ``maxOccurs`` in the :term:`WPS` context, and let `Weaver` infer the ``array`` and/or ``"null"`` type requirements automatically. Also, because of all implied parameters in this situation to specify the similar details, it is important to avoid providing contradicting specifications as `Weaver` will have -trouble guessing the intended result when merging specifications. If unambiguous guess can be made, :term:`CWL` will be -employed as the overruling definition to resolve erroneous mismatches (as for any other corresponding fields). +trouble guessing the intended result when merging specifications. If an ambiguous guess is made, :term:`CWL` will +be employed as the overruling definition to resolve erroneous mismatches (as for any other corresponding fields). .. warning:: Parameters ``minOccurs`` and ``maxOccurs`` are not permitted for outputs in the :term:`WPS` context. Native @@ -1048,20 +1053,20 @@ of *multiple* and *optional* information. :align: center :widths: 50,50 - +---------------------------------------------------+-----------------------------------------------------------+ - | .. code-block:: json | .. code-block:: json | - | :caption: WPS Multi-Value Input (required) | :caption: CWL Multi-Value Input (required) | - | :linenos: | :linenos: | - | | | - | { | { | - | "id": "input-multi-required", | "id": "input-multi-required", | - | "format": "application/json", | "format": "iana:application/json", | - | "minOccurs": 1, | "type": { | - | "maxOccurs": "unbounded" | "type": "array", "items": "File" | - | } | } | - | | } | - | | | - +---------------------------------------------------+-----------------------------------------------------------+ + +-------------------------------------------------------+-------------------------------------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: :term:`WPS` Multi-Value Input (required) | :caption: :term:`CWL` Multi-Value Input (required) | + | :linenos: | :linenos: | + | | | + | { | { | + | "id": "input-multi-required", | "id": "input-multi-required", | + | "format": "application/json", | "format": "iana:application/json", | + | "minOccurs": 2, | "type": { | + | "maxOccurs": "unbounded" | "type": "array", "items": "File" | + | } | } | + | | } | + | | | + +-------------------------------------------------------+-------------------------------------------------------+ It can be noted from the examples that ``minOccurs`` and ``maxOccurs`` can be either an ``integer`` or a ``string`` @@ -1102,27 +1107,27 @@ parameters presented later in this section. Some definitions are also not comple :align: center :widths: 33,34,33 - +-------------------------------+------------------------------+-----------------------------+ - | .. code-block:: json | .. code-block:: json | .. code-block:: json | - | :caption: WPS Input | :caption: OAS Input | :caption: CWL Input | - | :linenos: | :linenos: | :linenos: | - | | | | - | { | { | { | - | "id": "input", | "id": "input", | "id": "input", | - | "literalDataDomains": [ | "schema": { | "type": { | - | { | "type": "array", | "type": "array", | - | "allowedValues": [ | "items": { | "items": { | - | "value-1", | "type": "string", | "type": "enum", | - | "value-2" | "enum": [ | "symbols": [ | - | ] | "value-1", | "value-1", | - | } | "value-2" | "value-2" | - | ], | ] | ] | - | "minOccurs": 2, | }, | } | - | "maxOccurs": 4 | "minItems": 2, | } | - | } | "maxItems": 4 | } | - | | } | | - | | } | | - +-------------------------------+------------------------------+-----------------------------+ + +--------------------------------+--------------------------------+--------------------------------+ + | .. code-block:: json | .. code-block:: json | .. code-block:: json | + | :caption: :term:`WPS` Input | :caption: :term:`OAS` Input | :caption: :term:`CWL` Input | + | :linenos: | :linenos: | :linenos: | + | | | | + | { | { | { | + | "id": "input", | "id": "input", | "id": "input", | + | "literalDataDomains": [ | "schema": { | "type": { | + | { | "type": "array", | "type": "array", | + | "allowedValues": [ | "items": { | "items": { | + | "value-1", | "type": "string", | "type": "enum", | + | "value-2" | "enum": [ | "symbols": [ | + | ] | "value-1", | "value-1", | + | } | "value-2" | "value-2" | + | ], | ] | ] | + | "minOccurs": 2, | }, | } | + | "maxOccurs": 4 | "minItems": 2, | } | + | } | "maxItems": 4 | } | + | | } | | + | | } | | + +--------------------------------+--------------------------------+--------------------------------+ .. seealso:: An example with extensive variations of supported :term:`I/O` definitions with :term:`OAS` is @@ -1136,13 +1141,103 @@ As per all previous parameters in :term:`CWL` and :term:`WPS` contexts, details complementary and `Weaver` will attempt to infer, combine and convert between the various representations as best as possible according to the level of details provided. -Furthermore, `Weaver` will *extend* (as needed) any provided ``schema`` during +Furthermore, `Weaver` will *extend* (as needed) any provided ``schema`` and the resulting :term:`CWL` ``type`` during :ref:`Process Deployment ` if it can identify that the specific :term:`OAS` definition is inconsistent -with other parameters. For example, if ``minOccurs``/``maxOccurs`` were provided by indicating that the :term:`I/O` must -have exactly between [2-4] elements, but only a single :term:`OAS` object was defined under ``schema``, that :term:`OAS` -definition would be converted to the corresponding array, as single values are not permitted in this case. Similarly, if -the range of items was instead [1-4], the :term:`OAS` definition would be adjusted with ``oneOf`` keyword, allowing both +with other parameters. It will also consider any supplementary details provided across all contexts (:term:`CWL`, +:term:`WPS` and :term:`OAS`) to resolve the definitions in the other unspecified contexts. + +For example (as shown below), if ``minOccurs``/``maxOccurs`` are provided to indicate that the :term:`I/O` must +have exactly between [2-4] elements, but only a single :term:`OAS` object is defined under ``schema``, that :term:`OAS` +definition would be converted internally (since single values would not make sense in this case), to generate the +corresponding array representation for the final :term:`CWL` ``type``. If the amount of allowed items was +instead in the [1-4] range, the :term:`OAS` definition would be adjusted with a ``oneOf`` keyword, allowing both single value and array representation of those values, when submitted for :ref:`Process Execution `. +The :term:`CWL` ``type`` would also be adjusted automatically to represent the same single/multi-value flexibility. +Finally, if ``minOccurs=0`` is detected along the :term:`OAS` definition, `Weaver` will consider that this :term:`I/O` +is optional (see also :ref:`cwl-array-null-values`), and will update the :term:`CWL` representation accordingly +with the ``"null"`` type. + +.. table:: + :class: table-code + :name: table-io-oas2cwl-single + :align: center + :widths: 50,50 + + + + | Submitted :term:`OAS` :term:`I/O` | Resolved :term:`CWL` :term:`I/O` | + +------------------------------+-----------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: OAS Input | :caption: CWL Input | + | :linenos: | :linenos: | + | | | + | { | { | + | "id": "input", | "id": "input", | + | "schema": { | "type": { | + | "type": "string", | "type": "enum", | + | "enum": [ | "symbols": [ | + | "value-1", | "value-1", | + | "value-2" | "value-2" | + | ] | ] | + | }, | } | + | "minOccurs": 2, | } | + | "maxOccurs": 4 | | + | } | | + | | | + | | | + | | | + +------------------------------+-----------------------------+ + +.. table:: + :class: table-code + :name: table-io-oas2cwl-single-or-multi + :align: center + :widths: 50,50 + + + + | Submitted :term:`OAS` :term:`I/O` | Resolved :term:`CWL` :term:`I/O` | + +------------------------------+-----------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: OAS Input | :caption: CWL Input | + | :linenos: | :linenos: | + | | | + | { | { | + | "id": "input", | "id": "input", | + | "schema": { | "type": { | + | "type": "array", | "type": "array", | + | "items": { | "items": { | + | "type": "string", | "type": "enum", | + | "enum": [ | "symbols": [ | + | "value-1", | "value-1", | + | "value-2" | "value-2" | + | ] | ] | + | }, | } | + | "minItems": 2, | } | + | "maxItems": 4 | } | + | } | | + | } | | + +------------------------------+-----------------------------+ + +.. table:: + :class: table-code + :name: table-io-oas2cwl-single-nullable + :align: center + :widths: 50,50 + + + + | Submitted :term:`OAS` :term:`I/O` | Resolved :term:`CWL` :term:`I/O` | + +------------------------------+-----------------------------+ + +.. _warn-single-occurs-multi: +.. versionchanged:: 6.0 + + One known ambiguous resolution can happen when deploying a :term:`OGC API - Processes` that explicitly + defines an :term:`I/O` ``schema`` with a ``type: array`` specification. + By default, :term:`OGC API - Processes` and :term:`WPS` contexts consider that omitting ``minOccurs``/``maxOccurs`` + is equivalent to setting both of them to ``1``. + However, given that an ``array`` implies multiple values, the resolution is in itself ambiguous. + In such case, `Weaver` will assume that users intend their :term:`I/O` to be an array, and will silently ignore + the ambiguity to generate a :term:`CWL` and :term:`WPS` representation as if ``minOccurs>1`` was specified. + Below is a summary of fields that are equivalent or considered to identify similar specifications (corresponding fields are aligned in the table). @@ -1215,7 +1310,7 @@ automatically expended using the ``oneOf`` structure with other missing componen +-----------------------------------------------------------+---------------------------------------------------+ | .. code-block:: json | .. code-block:: json | - | :caption: JSON Complex Input with schema reference | :caption: Generic JSON Complex Input | + | :caption: :term:`JSON` Complex Input with Reference | :caption: Generic :term:`JSON` Complex Input | | | | | { | { | | "id:" "input", | "id:" "input", | From 935de78de76aa83f41ac61a9039d56d1e71c8850 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 22 Nov 2024 01:42:11 -0500 Subject: [PATCH 10/24] further improve docs about I/O definition using schema/type/occurs combinations --- docs/source/package.rst | 338 +++++++++++++++++++++++--------- tests/processes/test_convert.py | 129 +++++++++++- 2 files changed, 365 insertions(+), 102 deletions(-) diff --git a/docs/source/package.rst b/docs/source/package.rst index b3a84576a..940bb7ee6 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -1098,7 +1098,12 @@ compliant with :term:`OGC API - Processes` specification that requires this deta the :ref:`Process Description `. The same kind of ``schema`` definitions can be used for the :ref:`Deploy ` operation. -For example, the below representations are equivalent between :term:`WPS`, :term:`OAS` and :term:`CWL` definitions. +.. note:: + This section will present multiple example to illustrate the conversion logic of various :term:`I/O` + representations using the different :term:`WPS`/:term:`OAS`/:term:`CWL` contexts. + See table :ref:`table-io-summary-context-mapping` for a quick overview. + +Below representations are equivalent between :term:`WPS`, :term:`OAS` and :term:`CWL` definitions. Obviously, corresponding definitions can become more or less complicated with multiple combinations of corresponding parameters presented later in this section. Some definitions are also not completely portable between contexts. @@ -1146,98 +1151,216 @@ Furthermore, `Weaver` will *extend* (as needed) any provided ``schema`` and the with other parameters. It will also consider any supplementary details provided across all contexts (:term:`CWL`, :term:`WPS` and :term:`OAS`) to resolve the definitions in the other unspecified contexts. -For example (as shown below), if ``minOccurs``/``maxOccurs`` are provided to indicate that the :term:`I/O` must -have exactly between [2-4] elements, but only a single :term:`OAS` object is defined under ``schema``, that :term:`OAS` -definition would be converted internally (since single values would not make sense in this case), to generate the -corresponding array representation for the final :term:`CWL` ``type``. If the amount of allowed items was -instead in the [1-4] range, the :term:`OAS` definition would be adjusted with a ``oneOf`` keyword, allowing both -single value and array representation of those values, when submitted for :ref:`Process Execution `. -The :term:`CWL` ``type`` would also be adjusted automatically to represent the same single/multi-value flexibility. -Finally, if ``minOccurs=0`` is detected along the :term:`OAS` definition, `Weaver` will consider that this :term:`I/O` -is optional (see also :ref:`cwl-array-null-values`), and will update the :term:`CWL` representation accordingly -with the ``"null"`` type. +For example, as shown below, if ``minOccurs``/``maxOccurs`` are provided to indicate that the :term:`I/O` must +have exactly between [2-4] elements, but only a single :term:`OAS` object instance is defined under ``schema``, +that :term:`OAS` definition will be converted internally (since single values would not make sense in this case), +to generate the corresponding array representation for the final :term:`CWL` ``type``. .. table:: :class: table-code - :name: table-io-oas2cwl-single + :name: table-io-oas2cwl-multi + :caption: Mutil-Value Array inferred from Occurrences :align: center :widths: 50,50 - + - | Submitted :term:`OAS` :term:`I/O` | Resolved :term:`CWL` :term:`I/O` | - +------------------------------+-----------------------------+ - | .. code-block:: json | .. code-block:: json | - | :caption: OAS Input | :caption: CWL Input | - | :linenos: | :linenos: | - | | | - | { | { | - | "id": "input", | "id": "input", | - | "schema": { | "type": { | - | "type": "string", | "type": "enum", | - | "enum": [ | "symbols": [ | - | "value-1", | "value-1", | - | "value-2" | "value-2" | - | ] | ] | - | }, | } | - | "minOccurs": 2, | } | - | "maxOccurs": 4 | | - | } | | - | | | - | | | - | | | - +------------------------------+-----------------------------+ + +---------------------------------------------------+-----------------------------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: Submitted :term:`OAS` :term:`I/O` | :caption: Resolved :term:`CWL` :term:`I/O` | + | :linenos: | :linenos: | + | | | + | { | { | + | "id": "input", | "id": "input", | + | "schema": { | "type": { | + | "type": "string", | "type": "array", | + | "enum": [ | "items": { | + | "value-1", | "type": "enum", | + | "value-2" | "symbols": [ | + | ] | "value-1", | + | }, | "value-2" | + | "minOccurs": 2, | ] | + | "maxOccurs": 4 | } | + | } | } | + | | } | + +---------------------------------------------------+-----------------------------------------------+ + +If the amount of allowed items was instead in the [1-4] range (notice the modified ``minOccurs=1`` below), +while still providing only a single instance of those items under ``schema``, the :term:`OAS` definition +would be adjusted in the resulting :ref:`Process Description ` with a ``oneOf`` keyword +to conveniently allow submitting a :ref:`Process Execution ` with either single value or +an array of this value representation. +The :term:`CWL` ``type`` would also be adjusted automatically to represent the same single/multi-value flexibility. .. table:: :class: table-code :name: table-io-oas2cwl-single-or-multi + :caption: Single/Mutil-Value Array inferred from Occurrences :align: center :widths: 50,50 - + - | Submitted :term:`OAS` :term:`I/O` | Resolved :term:`CWL` :term:`I/O` | - +------------------------------+-----------------------------+ - | .. code-block:: json | .. code-block:: json | - | :caption: OAS Input | :caption: CWL Input | - | :linenos: | :linenos: | - | | | - | { | { | - | "id": "input", | "id": "input", | - | "schema": { | "type": { | - | "type": "array", | "type": "array", | - | "items": { | "items": { | - | "type": "string", | "type": "enum", | - | "enum": [ | "symbols": [ | - | "value-1", | "value-1", | - | "value-2" | "value-2" | - | ] | ] | - | }, | } | - | "minItems": 2, | } | - | "maxItems": 4 | } | - | } | | - | } | | - +------------------------------+-----------------------------+ + +---------------------------------------------------+-----------------------------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: Submitted :term:`OAS` :term:`I/O` | :caption: Resolved :term:`CWL` :term:`I/O` | + | :linenos: | :linenos: | + | | | + | { | { | + | "id": "input", | "id": "input", | + | "schema": { | "type": [ | + | "type": "string", | { | + | "enum": [ | "type": "enum", | + | "value-1", | "symbols": [ | + | "value-2" | "value-1", | + | ] | "value-2" | + | }, | ] | + | "minOccurs": 1, | }, | + | "maxOccurs": 4 | { | + | } | "type": "array", | + | | "items": { | + | | "type": "enum", | + | | "symbols": [ | + | | "value-1", | + | | "value-2" | + | | ] | + | | } | + | | } | + | | ] | + | | } | + +---------------------------------------------------+-----------------------------------------------+ + +Alternatively, if ``minOccurs=0`` is detected along the :term:`OAS` definition, `Weaver` will consider +that this :term:`I/O` is optional , and will update the :term:`CWL` +representation accordingly with the ``"null"`` type, as shown below. + +.. seealso:: + :ref:`cwl-array-null-values` .. table:: :class: table-code - :name: table-io-oas2cwl-single-nullable + :name: table-io-oas2cwl-single-or-multi-nullable + :caption: Nullable Single/Multi-Value Array inferred from Occurrences :align: center :widths: 50,50 - + - | Submitted :term:`OAS` :term:`I/O` | Resolved :term:`CWL` :term:`I/O` | - +------------------------------+-----------------------------+ + +---------------------------------------------------+-----------------------------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: Submitted :term:`OAS` :term:`I/O` | :caption: Resolved :term:`CWL` :term:`I/O` | + | :linenos: | :linenos: | + | | | + | { | { | + | "id": "input", | "id": "input", | + | "schema": { | "type": [ | + | "type": "string", | "null", | + | "enum": [ | { | + | "value-1", | "type": "enum", | + | "value-2" | "symbols": [ | + | ] | "value-1", | + | }, | "value-2" | + | "minOccurs": 0, | ] | + | "maxOccurs": 4 | }, | + | } | { | + | | "type": "array", | + | | "items": { | + | | "type": "enum", | + | | "symbols": [ | + | | "value-1", | + | | "value-2" | + | | ] | + | | } | + | | } | + | | ] | + | | } | + +---------------------------------------------------+-----------------------------------------------+ + +Contrary to previous examples where a "single-value" definition was always provided under the ``schema``, +it is also possible to provide a ``type: array`` directly. In such case, because of the requirement from +the ``schema``, single values cannot be submitted as they would not respect the array condition. +This change would be reflected in the resolved :term:`CWL` by omitting the single-value option in the ``type`` +definition, as shown below. + +.. note:: + In the below example, ``minOccurs`` and ``maxOccurs`` are not required, since the ``type: array`` + indication is sufficient in itself to identify that the :term:`CWL` should be only an array. + However, they can be provided to apply further restriction on the allowed :term:`I/O` dimensionality, + since :term:`CWL` will not directly enforce such restrictions. .. _warn-single-occurs-multi: -.. versionchanged:: 6.0 +.. warning:: - One known ambiguous resolution can happen when deploying a :term:`OGC API - Processes` that explicitly - defines an :term:`I/O` ``schema`` with a ``type: array`` specification. - By default, :term:`OGC API - Processes` and :term:`WPS` contexts consider that omitting ``minOccurs``/``maxOccurs`` - is equivalent to setting both of them to ``1``. - However, given that an ``array`` implies multiple values, the resolution is in itself ambiguous. - In such case, `Weaver` will assume that users intend their :term:`I/O` to be an array, and will silently ignore - the ambiguity to generate a :term:`CWL` and :term:`WPS` representation as if ``minOccurs>1`` was specified. + A seemingly ambiguous resolution can happen when an :term:`I/O` ``schema`` explicitly defines + a ``type: array`` specification by itself (without any ``minOccurs``/``maxOccurs`` indication). + By default, :term:`OGC API - Processes` and :term:`WPS` contexts consider that + omitting ``minOccurs``/``maxOccurs`` is equivalent to setting them both to ``1``. + However, given that an ``array`` implies multiple values, such resolution is in itself ambiguous and conflicting. + In such case, `Weaver` will assume that users intend their :term:`I/O` to be an ``array`` + (since it was specified explicitly in the submitted ``schema``), and will silently ignore the ambiguity + to generate a :term:`CWL` and :term:`WPS` representation as if ``minOccurs>1`` was specified. +.. table:: + :class: table-code + :name: table-io-oas2cwl-array + :caption: Multi-Value Array inferred from Occurrences + :align: center + :widths: 50,50 + + +---------------------------------------------------+-----------------------------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: Submitted :term:`OAS` :term:`I/O` | :caption: Resolved :term:`CWL` :term:`I/O` | + | :linenos: | :linenos: | + | | | + | { | { | + | "id": "input", | "id": "input", | + | "schema": { | "type": { | + | "type": "array", | "type": "array", | + | "items": { | "items": { | + | "type": "string", | "type": "enum", | + | "enum": [ | "symbols": [ | + | "value-1", | "value-1", | + | "value-2" | "value-2" | + | ] | ] | + | } | } | + | }, | } | + | } | } | + +---------------------------------------------------+-----------------------------------------------+ + +As one might expect, mixing ``minOccurs=0`` in combination with the ``type: array`` within the ``schema`` +will also generate the relevant representation by combining it with ``"null"`` for the :term:`CWL` type definition. + +.. table:: + :class: table-code + :name: table-io-oas2cwl-array-nullable + :caption: Nullable Multi-Value Array inferred from Occurrences + :align: center + :widths: 50,50 + + +---------------------------------------------------+-----------------------------------------------+ + | .. code-block:: json | .. code-block:: json | + | :caption: Submitted :term:`OAS` :term:`I/O` | :caption: Resolved :term:`CWL` :term:`I/O` | + | :linenos: | :linenos: | + | | | + | { | { | + | "id": "input", | "id": "input", | + | "schema": { | "type": [ | + | "type": "array", | "null", | + | "items": { | { | + | "type": "string", | "type": "array", | + | "enum": [ | "items": { | + | "value-1", | "type": "enum", | + | "value-2" | "symbols": [ | + | ] | "value-1", | + | } | "value-2" | + | }, | ] | + | "minOccurs": 0 | } | + | } | } | + | | ] | + | | } | + +---------------------------------------------------+-----------------------------------------------+ + +For any more complicated nested representations of multi-dimensional arrays, +a specific :term:`Media-Type` is required, as presented in further details by +the :ref:`oas_json_types` and :ref:`oas_file_references` sections. This is done +on purpose, since the auto-resolution of :term:`CWL` types would lead to too much +ambiguity when considering all combinations of array dimensionality, value cardinality and nested properties. +Defining an explicit :term:`Media-Type` registered within a naming authority registry will ensure that all +parties interacting with the :term:`Processes` will have a common understanding of their :term:`I/O` definition. Below is a summary of fields that are equivalent or considered to identify similar specifications (corresponding fields are aligned in the table). @@ -1247,6 +1370,8 @@ are not explicitly handled to search for corresponding definitions in :term:`WPS .. table:: :align: center + :name: table-io-summary-context-mapping + :caption: Summary of :term:`I/O` Context Mappings +-------------------------------------+---------------------------------------+------------------------------------+ | Parameters in :term:`WPS` Context | Parameters in :term:`OAS` Context | Parameters in :term:`CWL` Context | @@ -1296,38 +1421,63 @@ When any of those three :term:`JSON` definition is detected, other equivalent re a ``oneOf`` keyword if they were not already explicitly provided in ``schema``. When analyzing and combining those definitions, any :term:`OAS` ``$ref`` or ``contentSchema`` specifications will be used to resolve the corresponding ``type: object`` with the most explicit ``schema`` definition available. If this cannot be achieved, a generic -``object`` allowing any ``additionalProperties`` (i.e.: no :term:`JSON` schema variation) will be used instead. +``object`` allowing any ``additionalProperties`` (i.e.: no :term:`JSON` schema validation) will be used instead. External URIs pointing to an :term:`OAS` schema formatted either as :term:`JSON` or :term:`YAML` are resolved and fetched inline as needed during :term:`I/O` merging strategy to interpret specified references. -Following is a sample representation of equivalent variants :term:`JSON` definitions, which would be -automatically expended using the ``oneOf`` structure with other missing components if applicable. +Following are sample representations of equivalent variants of :term:`JSON` definitions, which would be +automatically expended using the ``oneOf`` structure if the other representations where missing when +submitting the :term:`I/O` definition. + +.. table:: + :class: table-code + :align: center + + +-----------------------------------------------------------+ + | .. code-block:: json | + | :caption: :term:`JSON` Complex Input with Reference | + | | + | { | + | "id:" "input", | + | "schema": { | + | "oneOf": [ | + | { | + | "type": "string", | + | "contentMediaType": "application/json", | + | "contentSchema": "http://host.com/schema.json" | + | }, | + | { | + | "$ref": "http://host.com/schema.json" | + | } | + | ] | + | } | + | } | + +-----------------------------------------------------------+ .. table:: :class: table-code :align: center - :widths: 50,50 - +-----------------------------------------------------------+---------------------------------------------------+ - | .. code-block:: json | .. code-block:: json | - | :caption: :term:`JSON` Complex Input with Reference | :caption: Generic :term:`JSON` Complex Input | - | | | - | { | { | - | "id:" "input", | "id:" "input", | - | "schema": { | "schema": { | - | "oneOf": [ | "oneOf": [ | - | { | { | - | "type": "string", | "type": "string", | - | "contentMediaType": "application/json" | "contentMediaType": "application/json" | - | "contentSchema": "http://host.com/schema.json" | }, | - | }, | { | - | { | "type": "object", | - | "$ref": "http://host.com/schema.json" | "additionalProperties": true | - | } | } | - | ] | ] | - | } | } | - | } | } | - +-----------------------------------------------------------+---------------------------------------------------+ + +---------------------------------------------------+ + | .. code-block:: json | + | :caption: Generic :term:`JSON` Complex Input | + | | + | { | + | "id:" "input", | + | "schema": { | + | "oneOf": [ | + | { | + | "type": "string", | + | "contentMediaType": "application/json" | + | }, | + | { | + | "type": "object", | + | "additionalProperties": true | + | } | + | ] | + | } | + | } | + +---------------------------------------------------+ Special handling of well-known :term:`OAS` ``type: object`` structures is also performed to convert them to more diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index e61ed9cfd..94e1e3a0e 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -533,14 +533,99 @@ def test_any2cwl_io_from_oas(): }, ), ( + # documentation example IO_INPUT, { "id": "test", "schema": { + "type": "string", + "enum": ["value-1", "value-2"] + }, + "minOccurs": 2, + "maxOccurs": 4, + }, + { + "id": "test", + "type": { + "type": "array", + "items": { + "type": "enum", + "symbols": ["value-1", "value-2"], + }, + } + } + ), + ( + # documentation example + IO_INPUT, + { + "id": "test", + "schema": { + "type": "string", + "enum": ["value-1", "value-2"] + }, + "minOccurs": 1, + "maxOccurs": 4, + }, + { + "id": "test", + "type": [ + { + "type": "enum", + "symbols": ["value-1", "value-2"], + }, + { + "type": "array", + "items": { + "type": "enum", + "symbols": ["value-1", "value-2"], + }, + } + ] + } + ), + ( + # documentation example + IO_INPUT, + { + "id": "test", + "schema": { + "type": "string", + "enum": ["value-1", "value-2"] + }, + "minOccurs": 0, + "maxOccurs": 4, + }, + { + "id": "test", + "type": [ + "null", + { + "type": "enum", + "symbols": ["value-1", "value-2"], + }, + { + "type": "array", + "items": { + "type": "enum", + "symbols": ["value-1", "value-2"], + }, + } + ] + } + ), + ( + # documentation example + IO_INPUT, + { + "id": "test", + "schema": { + # explicitly array, MUST force CWL to be an array as well + # doesn't matter that default min/max occurs = 1 "type": "array", "items": { "type": "string", - "enum": ["a", "b", "c"] + "enum": ["value-1", "value-2"] } } }, @@ -551,21 +636,47 @@ def test_any2cwl_io_from_oas(): "type": "array", "items": { "type": "enum", - "symbols": ["a", "b", "c"], + "symbols": ["value-1", "value-2"], }, } } ), ( + # documentation example IO_INPUT, { "id": "test", - "minOccurs": 0, + "minOccurs": 1, # same as previous, but explicitly specified rather than default "schema": { "type": "array", "items": { "type": "string", - "enum": ["a", "b", "c"] + "enum": ["value-1", "value-2"] + } + } + }, + { + "id": "test", + "type": { + "type": "array", + "items": { + "type": "enum", + "symbols": ["value-1", "value-2"], + }, + } + } + ), + ( + # documentation example + IO_INPUT, + { + "id": "test", + "minOccurs": 0, # because optional, array must be combined with "null" + "schema": { + "type": "array", + "items": { + "type": "string", + "enum": ["value-1", "value-2"] } } }, @@ -577,22 +688,24 @@ def test_any2cwl_io_from_oas(): "type": "array", "items": { "type": "enum", - "symbols": ["a", "b", "c"], + "symbols": ["value-1", "value-2"], }, } ] } ), ( + # documentation example IO_INPUT, { "id": "test", - "minOccurs": 0, + "minOccurs": 0, # same as previous + "maxOccurs": 4, # explicitly specified should not change anything "schema": { "type": "array", "items": { "type": "string", - "enum": ["a", "b", "c"] + "enum": ["value-1", "value-2"] } } }, @@ -604,7 +717,7 @@ def test_any2cwl_io_from_oas(): "type": "array", "items": { "type": "enum", - "symbols": ["a", "b", "c"], + "symbols": ["value-1", "value-2"], }, } ] From 1fa6170c12d210b4ff25dcd4ccc24b78776e9427 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 22 Nov 2024 02:38:45 -0500 Subject: [PATCH 11/24] fix docs linting --- docs/source/package.rst | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/docs/source/package.rst b/docs/source/package.rst index 940bb7ee6..266a95276 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -986,7 +986,7 @@ the following two variants are equivalent and completely interchangeable. :align: center :widths: 50,50 - +---------------------------------------------------+-----------------------------------------------+ + +---------------------------------------------------+-----------------------------------------------+ | .. code-block:: json | .. code-block:: json | | :caption: :term:`WPS` AllowedValues Input | :caption: :term:`CWL` Enum Values | | :linenos: | :linenos: | @@ -1156,10 +1156,9 @@ have exactly between [2-4] elements, but only a single :term:`OAS` object instan that :term:`OAS` definition will be converted internally (since single values would not make sense in this case), to generate the corresponding array representation for the final :term:`CWL` ``type``. -.. table:: +.. table:: Mutil-Value Array inferred from Occurrences :class: table-code :name: table-io-oas2cwl-multi - :caption: Mutil-Value Array inferred from Occurrences :align: center :widths: 50,50 @@ -1190,10 +1189,9 @@ to conveniently allow submitting a :ref:`Process Execution ` wi an array of this value representation. The :term:`CWL` ``type`` would also be adjusted automatically to represent the same single/multi-value flexibility. -.. table:: +.. table:: Single/Mutil-Value Array inferred from Occurrences :class: table-code :name: table-io-oas2cwl-single-or-multi - :caption: Single/Mutil-Value Array inferred from Occurrences :align: center :widths: 50,50 @@ -1233,10 +1231,9 @@ representation accordingly with the ``"null"`` type, as shown below. .. seealso:: :ref:`cwl-array-null-values` -.. table:: +.. table:: Nullable Single/Multi-Value Array inferred from Occurrences :class: table-code :name: table-io-oas2cwl-single-or-multi-nullable - :caption: Nullable Single/Multi-Value Array inferred from Occurrences :align: center :widths: 50,50 @@ -1294,10 +1291,9 @@ definition, as shown below. (since it was specified explicitly in the submitted ``schema``), and will silently ignore the ambiguity to generate a :term:`CWL` and :term:`WPS` representation as if ``minOccurs>1`` was specified. -.. table:: +.. table:: Multi-Value Array inferred from Occurrences :class: table-code :name: table-io-oas2cwl-array - :caption: Multi-Value Array inferred from Occurrences :align: center :widths: 50,50 @@ -1324,10 +1320,9 @@ definition, as shown below. As one might expect, mixing ``minOccurs=0`` in combination with the ``type: array`` within the ``schema`` will also generate the relevant representation by combining it with ``"null"`` for the :term:`CWL` type definition. -.. table:: +.. table:: Nullable Multi-Value Array inferred from Occurrences :class: table-code :name: table-io-oas2cwl-array-nullable - :caption: Nullable Multi-Value Array inferred from Occurrences :align: center :widths: 50,50 @@ -1368,10 +1363,9 @@ Note that all :term:`OAS` elements are always nested under the ``schema`` field located where appropriate as per :term:`OpenAPI` specification. Other :term:`OAS` fields are still permitted, but are not explicitly handled to search for corresponding definitions in :term:`WPS` and :term:`CWL` contexts. -.. table:: +.. table:: Summary of :term:`I/O` Context Mappings :align: center :name: table-io-summary-context-mapping - :caption: Summary of :term:`I/O` Context Mappings +-------------------------------------+---------------------------------------+------------------------------------+ | Parameters in :term:`WPS` Context | Parameters in :term:`OAS` Context | Parameters in :term:`CWL` Context | @@ -1438,7 +1432,7 @@ submitting the :term:`I/O` definition. | :caption: :term:`JSON` Complex Input with Reference | | | | { | - | "id:" "input", | + | "id": "input", | | "schema": { | | "oneOf": [ | | { | @@ -1463,7 +1457,7 @@ submitting the :term:`I/O` definition. | :caption: Generic :term:`JSON` Complex Input | | | | { | - | "id:" "input", | + | "id": "input", | | "schema": { | | "oneOf": [ | | { | @@ -1514,8 +1508,8 @@ purposes. by reference. 2. Using a generic ``{"type": "string", "format": "uri"}`` :term:`OAS` schema does not convey the :term:`Media-Types` - requirements as well as inferring them "link-to" ``{"type": "string", "contentMediaType: }``. It is therefore - better to omit them entirely as they do not add any :term:`I/O` descriptive value. + requirements as well as inferring them from a ``{"type": "string", "contentMediaType: }`` representation. + It is therefore better to omit them entirely as they do not add any :term:`I/O` descriptive value. 3. Because the above string-formatted ``uri`` are left out from definitions, it can instead be used explicitly in an :term:`I/O` specification to indicate to `Weaver` that the :term:`Process` uses a ``Literal`` URI string, that must @@ -1539,7 +1533,7 @@ schema can be added as well, as presented in :ref:`oas_json_types` section. | :caption: Single Format Complex Input | :caption: Multiple Supported Format Complex Input | | | | | { | { | - | "id:" "input", | "id:" "input", | + | "id": "input", | "id": "input", | | "schema": { | "schema": { | | "type": "string", | "oneOf": [ | | "contentMediaType": "image/png", | { | From 103529b39f0e4d658491aa4cce4e4249d706579b Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 22 Nov 2024 02:42:55 -0500 Subject: [PATCH 12/24] fix docs logo invalid path --- docs/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 98e150e8b..361181356 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -245,7 +245,7 @@ def doc_redirect_include(file_path): # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = "../_static/crim.png" +html_logo = "../../weaver/wps_restapi/templates/static/crim.png" # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 From 04f0d8546a8b91f5ca90e07531da4c7c87fbfa45 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 22 Nov 2024 03:34:27 -0500 Subject: [PATCH 13/24] fix default minOccurs=1 if unresolved by schema --- weaver/processes/convert.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 1d769358e..0d0caf370 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -947,6 +947,9 @@ def any2cwl_io(wps_io, io_select): if wps_max_occ is null: wps_max_occ = get_field(wps_io, "max_occurs", search_variations=True) + if wps_min_occ is null: + wps_min_occ = 1 + wps_default = get_field(wps_io, "default", search_variations=True) is_min_null = wps_min_occ in [0, "0"] allow_unique = wps_min_occ in [0, "0", 1, "1"] From 310f4b9b1a6864300aa22cd6342abd8688c39bc9 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 22 Nov 2024 10:29:55 -0500 Subject: [PATCH 14/24] attempt swap order of coverage actions --- .github/workflows/tests.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index aab149e47..cd7fbb022 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -131,13 +131,6 @@ jobs: - name: Run Tests run: make stop ${{ matrix.test-case }} - - name: Upload test results to Codecov - if: ${{ !cancelled() && success() && matrix.test-case == 'test-coverage-only' }} - uses: codecov/test-results-action@v1 - with: - files: reports/coverage-junit.xml,!./cache - flags: ${{ matrix.python-version }} - token: ${{ secrets.CODECOV_TOKEN }} - name: Upload coverage report uses: codecov/codecov-action@v2 if: ${{ success() && matrix.test-case == 'test-coverage-only' }} @@ -146,6 +139,13 @@ jobs: files: ./reports/coverage.xml fail_ci_if_error: true verbose: true + - name: Upload test results to Codecov + if: ${{ !cancelled() && success() && matrix.test-case == 'test-coverage-only' }} + uses: codecov/test-results-action@v1 + with: + files: reports/coverage-junit.xml,!./cache + flags: ${{ matrix.python-version }} + token: ${{ secrets.CODECOV_TOKEN }} deploy-docker: needs: tests From f24885728713b715b474bd15ecc6a5d3ccd5b11d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 22 Nov 2024 18:41:28 -0500 Subject: [PATCH 15/24] disable codecov test analytics to bring back useful coverage reports (relates to https://github.com/codecov/feedback/issues/304#issuecomment-2492675117) --- .github/workflows/tests.yml | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cd7fbb022..16997e631 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -131,6 +131,15 @@ jobs: - name: Run Tests run: make stop ${{ matrix.test-case }} + # disable due to confusing and unhelpful messages created by codecov + # (see )https://github.com/codecov/feedback/issues/304#issuecomment-2492675117) + # - name: Upload test results to Codecov + # if: ${{ !cancelled() && success() && matrix.test-case == 'test-coverage-only' }} + # uses: codecov/test-results-action@v1 + # with: + # files: reports/coverage-junit.xml,!./cache + # flags: ${{ matrix.python-version }} + # token: ${{ secrets.CODECOV_TOKEN }} - name: Upload coverage report uses: codecov/codecov-action@v2 if: ${{ success() && matrix.test-case == 'test-coverage-only' }} @@ -139,13 +148,6 @@ jobs: files: ./reports/coverage.xml fail_ci_if_error: true verbose: true - - name: Upload test results to Codecov - if: ${{ !cancelled() && success() && matrix.test-case == 'test-coverage-only' }} - uses: codecov/test-results-action@v1 - with: - files: reports/coverage-junit.xml,!./cache - flags: ${{ matrix.python-version }} - token: ${{ secrets.CODECOV_TOKEN }} deploy-docker: needs: tests From 39b95707aeb2f68259b709ccb01572c431748a1c Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 22 Nov 2024 19:36:05 -0500 Subject: [PATCH 16/24] add workaround patch of enum with colon failing CWL validation (relates to https://github.com/common-workflow-language/cwltool/issues/2071) --- CHANGES.rst | 5 ++ tests/processes/test_convert.py | 90 +++++++++++++++++++++++++++++++-- weaver/formats.py | 2 +- weaver/processes/convert.py | 48 +++++++++++++++--- 4 files changed, 134 insertions(+), 11 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e2afd597a..b249e3174 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -65,6 +65,11 @@ Changes: Fixes: ------ +- Fix `CWL` ``enum`` type mishandling ``symbols`` containing a colon (``:``) character (e.g.: a list of allowed times) + leading to their invalid interpretation as namespaced strings (i.e.: ``:``), in turn failing validation + and breaking the resulting `CWL`. Such ``enum`` will be patched with updated ``symbols`` prefixed by ``#`` to respect + the expected URI representation of ``enum`` values by the `CWL` parser (relates to + `common-workflow-language/cwltool#2071 `_). - Fix `CWL` conversion from a `OGC API - Processes` definition specifying an `I/O` with ``schema`` explicitly indicating a ``type: array`` and nested ``enum``, even if ``minOccurs: 1`` is omitted or explicitly set. - Fix ``url`` parameter to override the `CLI` internal ``url`` when passed explicitly to the invoked operation. diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index 94e1e3a0e..f24bd2c1b 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -2236,29 +2236,34 @@ def test_ogcapi2cwl_process_without_extra(): @pytest.mark.parametrize( - ["input_str", "input_int", "input_float"], + ["input_str", "input_int", "input_float", "input_time"], [ # OpenAPI schema references ( {"schema": {"type": "string", "enum": ["a", "b", "c"]}}, {"schema": {"type": "integer", "enum": [1, 2, 3]}}, {"schema": {"type": "number", "format": "float", "enum": [1.2, 3.4]}}, + {"schema": {"type": "string", "format": "time", "enum": ["12:00", "24:00"]}}, ), # OGC-API input definitions ( {"data_type": "string", "allowed_values": ["a", "b", "c"]}, {"data_type": "integer", "allowed_values": [1, 2, 3]}, {"data_type": "float", "allowed_values": [1.2, 3.4]}, + {"data_type": "string", "allowed_values": ["12:00", "24:00"]}, ), ] ) -def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float): +def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float, input_time): """ - Test that a :term:`CWL` with pseudo-``Enum`` type has the necessary :term:`CWL` requirements to perform validation. + Test that a :term:`CWL` with pseudo-``enum`` type has the necessary :term:`CWL` requirements to perform validation. .. seealso:: - :func:`test_any2cwl_io_enum_convert` - :func:`test_any2cwl_io_enum_validate` + - :func:`weaver.processes.convert._convert_cwl_io_enum` + - :func:`weaver.processes.convert._get_cwl_js_value_from` + - :func:`weaver.processes.convert._patch_cwl_enum_js_requirement` """ href = "https://remote-server.com/processes/test-process" body = { @@ -2266,6 +2271,7 @@ def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float): "enum-str": input_str, "enum-int": input_int, "enum-float": input_float, + "enum-time": input_time, }, "outputs": { "output": {"schema": {"type": "string", "contentMediaType": ContentType.TEXT_PLAIN}}, @@ -2295,6 +2301,84 @@ def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float): assert "values.includes(self)" in cwl_value_from assert "self.every(item => values.includes(item))" not in cwl_value_from + assert cwl["inputs"]["enum-time"]["type"] == {"type": "enum", "symbols": ["#12:00", "#24:00"]} + assert "inputBinding" not in cwl["inputs"]["enum-time"] + + +@pytest.mark.parametrize( + ["cwl_io", "wps_io_expected"], + [ + ( + { + "name": "test", + "type": "enum", + "symbols": ["#12:00", "#24:00"], + }, + { + "id": "test", + "allowed_values": ["12:00", "24:00"], + } + ), + ( + { + "name": "test", + "type": { + "type": "enum", + "symbols": ["#12:00", "#24:00"], + } + }, + { + "id": "test", + "allowed_values": ["12:00", "24:00"], + } + ), + ( + { + "name": "test", + "type": { + "type": "array", + "items": { + "type": "enum", + "symbols": ["#12:00", "#24:00"], + } + } + }, + { + "id": "test", + "allowed_values": ["12:00", "24:00"], + "min_occurs": 1, + } + ), + ( + { + "name": "test", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "enum", + "symbols": ["#12:00", "#24:00"], + } + } + ] + }, + { + "id": "test", + "allowed_values": ["12:00", "24:00"], + "min_occurs": 1, + } + ) + ] +) +def test_patched_cwl_enum_colon_back_conversion(cwl_io, wps_io_expected): + """ + Given a :term:`CWL` ``enum`` that contains ``:`` characters patched with prefixed ``#`` test the inverse conversion. + """ + wps_io = cwl2wps_io(cwl_io, IO_INPUT) + wps_allow = [allow.value for allow in wps_io.allowed_values] + assert wps_allow == ["12:00", "24:00"] + @mocked_remote_server_requests_wps1([ resources.TEST_REMOTE_SERVER_URL, diff --git a/weaver/formats.py b/weaver/formats.py index 133512880..cddc7dcf9 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -828,7 +828,7 @@ def get_cwl_file_format(media_type, make_reference=False, must_exist=True, allow ``must_exist=False`` before providing it to the `CWL` I/O definition. Setting ``must_exist=False`` should be used only for literal string comparison or pre-processing steps to evaluate formats. - :param media_type: Some reference, namespace'd or literal (possibly extended) media-type string. + :param media_type: Some reference, namespaced or literal (possibly extended) media-type string. :param make_reference: Construct the full URL reference to the resolved media-type. Otherwise, return tuple details. :param must_exist: Return result only if it can be resolved to an official media-type (or synonym if enabled), otherwise ``None``. diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 0d0caf370..864c112f1 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -857,10 +857,12 @@ def _get_cwl_js_value_from(cwl_io_symbols, allow_unique, allow_array): def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, allow_array): # type: (Union[str, Type[null]], List[AnyValueType], IO_Select_Type, bool, bool) -> CWL_IO_Type """ - Converts the :term:`I/O` definition to a :term:`CWL` :term:`I/O` that allows ``Enum``-like functionality. + Converts the :term:`I/O` definition to a :term:`CWL` :term:`I/O` that allows ``enum``-like functionality. In the event of an explicit ``string`` as base type, :term:`CWL` directly supports ``type: enum``. Other basic - types are not directly supported, and must instead perform manual validation against the set of allowed values. + types are not directly supported, and must instead perform manual validation against the set of allowed values, + using JavaScript evaluation applied by ``valueFrom`` against the the submitted values to replicate the automatic + behavior performed by ``enum`` type. .. seealso:: - https://github.com/common-workflow-language/cwl-v1.2/issues/267 @@ -871,15 +873,35 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a Because ``valueFrom`` can only be used with ``inputBinding``, any output providing a set of allowed values that are not ``string``-based will be ignored when converted to :term:`CWL` :term:`I/O`. + .. seealso:: + - :func:`_get_cwl_js_value_from` defines the ``enum``-like ``valueFrom`` checks + - :func:`_patch_cwl_enum_js_requirement` must be called on the entire :term:`CWL` to inject the + relevant :data:`CWL_REQUIREMENT_INLINE_JAVASCRIPT`, since it might not be defined in the original :term:`CWL`. + + Another edge-case that happens even if the base type is a ``string`` is when the enum ``symbols`` + contain an entry with a ``:`` character (e.g.: as in the case of a time ``HH:MM`` string). + In such case, :mod:`schema_salad` incorrectly parses it as if a namespaced :term:`URL` (i.e.: ``:``) + was specified. Because of this, the symbol strings get extended to an unresolvable namespace, which leads to a + partial and incorrect enum value (e.g.: ``abc:def`` becomes ``input-id#def``). This causes the resulting ``enum`` + to never accept the submitted values, or worst, causes the entire :term:`CWL` to fail validation if duplicate + values are obtained (e.g.: ``12:00`` and ``24:00`` both extend to ``input-id#00``, causing a duplicate ``00``). + To handle this case, the ``symbols`` can be updated with a prefixed ``#`` character, making :mod:`schema_salad` + interpret it as if it was already a :term:`URI` relative to the input, which is what it aims to generate, and + therefore interprets the rest of the string (including the ``:``) literally. + + .. seealso:: + - https://github.com/common-workflow-language/cwltool/issues/2071 + :param cwl_io_type: Basic type for which allowed values should apply. :param cwl_io_symbols: Allowed values to restrict the :term:`I/O` definition. :return: Converted definition as CWL Enum or with relevant value validation as applicable for the type. """ if cwl_io_type not in PACKAGE_BASIC_TYPES: return {} - if cwl_io_type == "string": + any_colon_symbol = any(":" in str(symbol) for symbol in cwl_io_symbols) + if cwl_io_type == "string" and not any_colon_symbol: return {"type": {"type": PACKAGE_ENUM_BASE, "symbols": cwl_io_symbols}} - if cwl_io_type not in PACKAGE_NUMERIC_TYPES: + if cwl_io_type not in PACKAGE_NUMERIC_TYPES and not (cwl_io_type == "string" and any_colon_symbol): LOGGER.warning( "Could not resolve conversion of CWL I/O as Enum for type '%s'. " "Ignoring value validation against specified allowed values: %s.", @@ -889,9 +911,10 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a return {"type": cwl_io_type} if not ( - (all(isinstance(value, bool) for value in cwl_io_symbols) and cwl_io_type == "boolean") or - (all(isinstance(value, int) for value in cwl_io_symbols) and cwl_io_type in PACKAGE_INTEGER_TYPES) or - (all(isinstance(value, float) for value in cwl_io_symbols) and cwl_io_type in PACKAGE_FLOATING_TYPES) + (cwl_io_type == "string" and all(isinstance(value, str) for value in cwl_io_symbols)) or + (cwl_io_type == "boolean" and all(isinstance(value, bool) for value in cwl_io_symbols)) or + (cwl_io_type in PACKAGE_INTEGER_TYPES and all(isinstance(value, int) for value in cwl_io_symbols)) or + (cwl_io_type in PACKAGE_FLOATING_TYPES and all(isinstance(value, float) for value in cwl_io_symbols)) ): LOGGER.warning( "Incompatible CWL I/O type '%s' detected for specified allowed values: %s. " @@ -901,6 +924,10 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a ) cwl_io_type = "Any" + if any_colon_symbol: + cwl_io_symbols = [f"#{symbol}" for symbol in cwl_io_symbols] + return {"type": cwl_io_type, "symbols": cwl_io_symbols} + if io_select != IO_INPUT: return {"type": cwl_io_type} @@ -1391,6 +1418,13 @@ def parse_cwl_enum_type(io_info): f"Unsupported I/O 'enum' base type: `{type(first_allow)!s}`, from definition: `{io_info!r}`." ) + if io_type == "string": + # un-patch enum causing CWL parsing errors caused by ':' (see _convert_cwl_io_enum) + io_allow = [ + allow[1:] if allow.startswith("#") and ":" in allow else allow + for allow in io_allow + ] + io_def = CWLIODefinition( type=io_type, # type: ignore enum=True, From deed73d007300c3bd56be74ea6452d53e86d5eab Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Sat, 23 Nov 2024 01:03:29 -0500 Subject: [PATCH 17/24] fix metadta role as URI + add WPS->CWL metadata inverse mapping --- CHANGES.rst | 10 +- docs/source/package.rst | 22 +-- tests/processes/test_wps_package.py | 256 ++++++++++++++++++++++++---- weaver/processes/wps_package.py | 119 ++++++++----- 4 files changed, 317 insertions(+), 90 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e2afd597a..3435505e9 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -62,6 +62,10 @@ Changes: submitted ``response`` body parameter (*OGC API - Processes v1.0*), the ``Prefer: return`` header (*OGC API - Processes v2.0*), the requested ``Accept`` header, and any relevant ``transmissionMode`` request body overrides per filtered ``outputs``. +- Modify the mapping and generation of `WPS`/`OGC API` metadata against `CWL` corresponding fields using + the namespaced ``schema.org`` to *always* employ the full `URI` as ``rel`` or ``role`` according to the + provided metadata link or value to allow explicit identification of the ``schema.org`` concept origin. +- Add mapping of metadata from `CWL` to `WPS`/`OGC API` ``metadata`` field for additional ``schema.org`` concepts. Fixes: ------ @@ -70,8 +74,10 @@ Fixes: - Fix ``url`` parameter to override the `CLI` internal ``url`` when passed explicitly to the invoked operation. - Fix ``href`` detection when provided directly as mapping within the ``executionUnit`` of the deployment body. - Fix definition of `CWL` ``schema.org`` namespaced fields (i.e.: ``s:author`` and ``s:dateCreated``) causing - schema deserialization error when validation the submitted request body against typical examples provided in + schema deserialization error when validating the submitted request body against typical examples provided in `CWL Metadata and Authorship `_. +- Fix mapping of `CWL` ``schema.org`` metadata to `WPS`/`OGC API` equivalent metadata defining invalid ``role`` + not respecting the `URI` schema validation constraint. - Fix ``GET /jobs/{jobId}/inputs`` contents to correctly return the submitted ``outputs`` definition for `Process` execution (fixes `#715 `_). - Fix missing ``Link`` header with ``rel: monitor`` relationship in the created `Job` responses @@ -114,7 +120,7 @@ Changes: and a sample `crim-ca/ncml2stac `_ repository making use of it with the `Weaver` `CLI` to generate a deployed `OGC API - Processes` definition (fixes `#63 `_). -- Add parsing of additional metadata from schema.org in CWL document to convert into process fields +- Add parsing of additional metadata from ``schema.org`` in CWL document to convert into process fields (fixes `#463 `_). - Add more metadata mapping details in documentation (fixes `#613 `_). diff --git a/docs/source/package.rst b/docs/source/package.rst index 266a95276..ad1f0a764 100644 --- a/docs/source/package.rst +++ b/docs/source/package.rst @@ -1552,15 +1552,15 @@ schema can be added as well, as presented in :ref:`oas_json_types` section. Metadata ----------------------- -Metadata fields are transferred between :term:`WPS` (from :term:`Process` description) and :term:`CWL` +Metadata fields are transferred between :term:`WPS`/:term:`OAP` (from :term:`Process` description) and :term:`CWL` (from :term:`Application Package`) when match is possible. Per :term:`I/O` definition that support certain metadata fields (notably descriptions), are also transferred. .. note:: - Because the ``schema`` (:term:`OAS`) definitions are embedded within :term:`WPS` I/O definitions, corresponding - metadata fields **ARE NOT** transferred. This choice is made in order to keep ``schema`` succinct such that they - only describe the structure of the expected data type and format, and to avoid too much metadata duplication for - each :term:`I/O` in the resulting :term:`Process` description. + Metadata fields nested within the ``schema`` (:term:`OAS`) property employed to represent + an :term:`OAP` :term:`I/O` definition **ARE NOT** transferred. This choice is made in order to keep ``schema`` + succinct such that they only describe the structure of the expected data type and format, + and to avoid too much metadata duplication for each :term:`I/O` in the resulting :term:`Process` description. Below is a list of compatible elements. @@ -1572,9 +1572,9 @@ Below is a list of compatible elements. +-----------------------------------------+----------------------------------------------------------+ | Parameters in :term:`WPS` Context | Parameters in :term:`CWL` Context | +=========================================+==========================================================+ - | ``keywords`` | ``s:keywords`` [#cwl_schemaorg]_ | + | ``keywords`` | ``s:keywords`` [#CWLschemaorg]_ | +-----------------------------------------+----------------------------------------------------------+ - | ``metadata`` |br| | Supported fields [#cwl_schemaorg]_ : |br| | + | ``metadata`` |br| | Supported fields [#CWLschemaorg]_ : |br| | | (using ``title``, ``role``, ``value``, | - ``s:author`` |br| | | ``rel`` and ``href`` fields) | - ``s:citation`` |br| | | | - ``s:codeRepository`` |br| | @@ -1587,15 +1587,15 @@ Below is a list of compatible elements. +-----------------------------------------+----------------------------------------------------------+ | ``abstract``/``description`` | ``doc`` | +-----------------------------------------+----------------------------------------------------------+ - | ``version`` | ``s:version``/``s:softwareVersion`` [#cwl_schemaorg]_ | + | ``version`` | ``s:version``/``s:softwareVersion`` [#CWLschemaorg]_ | +-----------------------------------------+----------------------------------------------------------+ .. rubric:: Details -.. [#cwl_schemaorg] +.. [#CWLschemaorg] When using these properties, it is expected that the :term:`CWL` :term:`Application Package` resolves the ``$schemas`` with a reference to the |cwl-metadata-schema-org|_. Furthermore, the ``$namespaces`` - is expected to resolve the prefix ``s`` to the `http://schema.org `_ definitions + is expected to resolve the prefix ``s`` to the `https://schema.org `_ definitions corresponding to the RDF schema, as shown below. See |cwl-metadata|_ for a complete example using those fields and their expected contents. @@ -1605,7 +1605,7 @@ Below is a list of compatible elements. $schemas: - https://schema.org/version/latest/schemaorg-current-https.rdf $namespaces: - s: http://schema.org + s: https://schema.org .. _app_pkg_secret_parameters: diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py index 4eab31147..2bb0b0e29 100644 --- a/tests/processes/test_wps_package.py +++ b/tests/processes/test_wps_package.py @@ -851,7 +851,7 @@ def test_format_extension_validator_basic(data_input, mode, expect): @pytest.mark.parametrize( - ["cwl_package_package", "wps_package_metadata", "cwl_package_expected"], + ["cwl_package", "wps_metadata", "process_metadata_expected", "cwl_metadata_expected"], [ ( # Test author metadata with empty wps_package @@ -866,7 +866,7 @@ def test_format_extension_validator_basic(data_input, mode, expect): "title": "", "metadata": [ { - "role": "author", + "role": "https://schema.org/author", "value": { "$schema": "https://schema.org/Person", "name": "John Doe", @@ -874,12 +874,17 @@ def test_format_extension_validator_basic(data_input, mode, expect): } } ] + }, + { + "s:author": [ + {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."} + ], } ), ( # Test codeRepository { - "s:codeRepository": "https://gitlab.com/", + "s:codeRepository": "https://gitlab.com/some-org/some-repo", }, {}, { @@ -888,10 +893,13 @@ def test_format_extension_validator_basic(data_input, mode, expect): "metadata": [ { "type": "text/html", - "rel": "codeRepository", - "href": "https://gitlab.com/" + "rel": "https://schema.org/codeRepository", + "href": "https://gitlab.com/some-org/some-repo" } ] + }, + { + "s:codeRepository": "https://gitlab.com/some-org/some-repo" } ), ( @@ -903,8 +911,8 @@ def test_format_extension_validator_basic(data_input, mode, expect): "metadata": [ { "type": "text/html", - "rel": "codeRepository", - "href": "https://gitlab.com/" + "rel": "https://schema.org/codeRepository", + "href": "https://gitlab.com/some-org/some-repo" } ] }, @@ -915,10 +923,14 @@ def test_format_extension_validator_basic(data_input, mode, expect): "metadata": [ { "type": "text/html", - "rel": "codeRepository", - "href": "https://gitlab.com/" + "rel": "https://schema.org/codeRepository", + "href": "https://gitlab.com/some-org/some-repo" }, ], + }, + { + "s:version": "1.0", + "s:codeRepository": "https://gitlab.com/some-org/some-repo" } ), ( @@ -931,6 +943,9 @@ def test_format_extension_validator_basic(data_input, mode, expect): "abstract": "", "title": "", "version": "1.0.0" + }, + { + "s:softwareVersion": "1.0.0" } ), ( @@ -947,7 +962,7 @@ def test_format_extension_validator_basic(data_input, mode, expect): "title": "", "metadata": [ { - "role": "contributor", + "role": "https://schema.org/contributor", "value": { "$schema": "https://schema.org/Person", "name": "John Doe", @@ -955,7 +970,7 @@ def test_format_extension_validator_basic(data_input, mode, expect): } }, { - "role": "contributor", + "role": "https://schema.org/contributor", "value": { "$schema": "https://schema.org/Person", "name": "Other Guy", @@ -963,6 +978,12 @@ def test_format_extension_validator_basic(data_input, mode, expect): } } ] + }, + { + "s:contributor": [ + {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."}, + {"class": "s:Person", "s:name": "Other Guy", "s:affiliation": "Elsewhere"}, + ], } ), ( @@ -977,10 +998,13 @@ def test_format_extension_validator_basic(data_input, mode, expect): "metadata": [ { "type": "text/plain", - "rel": "citation", + "rel": "https://schema.org/citation", "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" }, ], + }, + { + "s:citation": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2", } ), ( @@ -994,7 +1018,7 @@ def test_format_extension_validator_basic(data_input, mode, expect): "metadata": [ { "type": "text/plain", - "rel": "citation", + "rel": "https://schema.org/citation", "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" }, ], @@ -1005,14 +1029,18 @@ def test_format_extension_validator_basic(data_input, mode, expect): "metadata": [ { "type": "text/plain", - "rel": "citation", + "rel": "https://schema.org/citation", "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" }, { - "role": "dateCreated", + "role": "https://schema.org/dateCreated", "value": "2016-12-13", } ] + }, + { + "s:citation": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2", + "s:dateCreated": "2016-12-13", } ), ( @@ -1024,7 +1052,7 @@ def test_format_extension_validator_basic(data_input, mode, expect): { "metadata": [ { - "role": "test", + "role": "https://example.com/test", "value": "test", } ] @@ -1034,15 +1062,15 @@ def test_format_extension_validator_basic(data_input, mode, expect): "title": "", "metadata": [ { - "role": "test", + "role": "https://example.com/test", "value": "test", - }, - { - "title": CWL_NAMESPACE_SCHEMA_ID, - "href": CWL_NAMESPACE_SCHEMA_URL, } ] - } + }, + { + "$schemas": [CWL_NAMESPACE_SCHEMA_URL], + "$namespaces": dict(CWL_NAMESPACE_SCHEMA_DEFINITION), + }, ), ( # test CWL 's:keywords' vs WPS 'keywords' @@ -1053,15 +1081,183 @@ def test_format_extension_validator_basic(data_input, mode, expect): "keywords": ["a", "x", "y", "d", "e", "f"], }, lambda src: set(src["keywords"]) == {"a", "b", "c", "x", "y", "d", "e", "f"}, + { + "s:keywords": ["a", "b", "c"], + }, ), + ( + # test that uses multiple combinations, some info on one side or the other, and some mixed + { + "s:version": "1.2.3", + "s:author": [ + {"class": "s:Person", "s:name": "Another Guy", "s:affiliation": "Super Industry"} + ], + }, + { + "metadata": [ + { + "type": "text/plain", + "rel": "https://schema.org/citation", + "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + }, + { + "role": "https://schema.org/dateCreated", + "value": "2016-12-13", + }, + { + "role": "https://schema.org/author", + "value": { + "$schema": "https://schema.org/Person", + "name": "Main Guy", + "affiliation": "Some Company" + } + }, + { + "role": "https://schema.org/contributor", + "value": { + "$schema": "https://schema.org/Person", + "name": "John Doe", + "affiliation": "Example Inc." + } + }, + { + "role": "https://schema.org/contributor", + "value": { + "$schema": "https://schema.org/Person", + "name": "Other Guy", + "affiliation": "Elsewhere" + } + }, + ] + }, + { + "title": "", + "abstract": "", + "version": "1.2.3", + "metadata": [ + { + "type": "text/plain", + "rel": "https://schema.org/citation", + "href": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2" + }, + { + "role": "https://schema.org/dateCreated", + "value": "2016-12-13", + }, + { + "role": "https://schema.org/author", + "value": { + "$schema": "https://schema.org/Person", + "name": "Main Guy", + "affiliation": "Some Company" + } + }, + { + "role": "https://schema.org/contributor", + "value": { + "$schema": "https://schema.org/Person", + "name": "John Doe", + "affiliation": "Example Inc." + } + }, + { + "role": "https://schema.org/contributor", + "value": { + "$schema": "https://schema.org/Person", + "name": "Other Guy", + "affiliation": "Elsewhere" + } + }, + { + "role": "https://schema.org/author", + "value": { + "$schema": "https://schema.org/Person", + "name": "Another Guy", + "affiliation": "Super Industry" + } + }, + ] + }, + { + "s:version": "1.2.3", + "s:citation": "https://dx.doi.org/10.6084/m9.figshare.3115156.v2", + "s:dateCreated": "2016-12-13", + "s:author": [ + # must not be replaced by the other author defined in the process metadata! + {"class": "s:Person", "s:name": "Another Guy", "s:affiliation": "Super Industry"} + ], + "s:contributor": [ + {"class": "s:Person", "s:name": "John Doe", "s:affiliation": "Example Inc."}, + {"class": "s:Person", "s:name": "Other Guy", "s:affiliation": "Elsewhere"}, + ], + } + ), + ( + # Duplicate WPS "role/rel" that must map to a single field on CWL side is ignored (cannot disambiguate). + # At the same, test that both "role/rel" are considered, with "role" prioritized since another "rel" can + # be used to represent the link by another commonly known relation-type. + {}, + { + "metadata": [ + { + "type": "text/html", + "rel": "https://schema.org/codeRepository", + "href": "https://gitlab.com/some-org/some-repo" + }, + { + "type": "text/html", + "rel": "alt-source", + "role": "https://schema.org/codeRepository", + "href": "https://github.com/alt-org/other-repo" + } + ] + }, + { + "title": "", + "abstract": "", + "metadata": [ + { + "type": "text/html", + "rel": "https://schema.org/codeRepository", + "href": "https://gitlab.com/some-org/some-repo" + }, + { + "type": "text/html", + "rel": "alt-source", + "role": "https://schema.org/codeRepository", + "href": "https://github.com/alt-org/other-repo" + } + ] + }, + {}, # should not be updated with any of the links + ) ] ) -def test_process_metadata(cwl_package_package, wps_package_metadata, cwl_package_expected): - # type: (CWL, ProcessOfferingMapping, Union[CWL, Callable[[CWL], bool]]) -> None - cwl_package_validated = sd.CWLMetadata().deserialize(cwl_package_package) # must not raise - assert cwl_package_validated == cwl_package_package # should be unchanged - _update_package_metadata(wps_package_metadata, cwl_package_package) - if inspect.isfunction(cwl_package_expected): - assert cwl_package_expected(wps_package_metadata) +def test_process_metadata(cwl_package, wps_metadata, process_metadata_expected, cwl_metadata_expected): + # type: (CWL, ProcessOfferingMapping, Union[CWL, Callable[[CWL], bool]], CWL) -> None + + # submitted CWL metadata must not raise and must be unmodified after validation + cwl_package_validated = sd.CWLMetadata().deserialize(cwl_package) + assert cwl_package_validated == cwl_package + + # submitted WPS metadata must not raise and must be unmodified after validation + if "metadata" in wps_metadata: + wps_metadata_validated = sd.DescriptionMeta().deserialize(wps_metadata) + assert wps_metadata_validated["metadata"] == wps_metadata["metadata"] + + _update_package_metadata(wps_metadata, cwl_package) + + # resolved result should be as expected + if inspect.isfunction(process_metadata_expected): + assert process_metadata_expected(wps_metadata) else: - assert wps_package_metadata == cwl_package_expected + assert wps_metadata == process_metadata_expected + + # resolved metadata must not raise and must be unmodified after validation + if isinstance(process_metadata_expected, dict) and "metadata" in process_metadata_expected: + process_metadata_validated = sd.DescriptionMeta().deserialize(process_metadata_expected) + assert process_metadata_validated["metadata"] == process_metadata_expected["metadata"] + + # resolved CWL metadata must not raise and must be unmodified after validation + cwl_package_validated = sd.CWLMetadata().deserialize(cwl_package) + assert cwl_package_validated == cwl_metadata_expected diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index 579603f76..e4ef73bf7 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -75,8 +75,11 @@ from weaver.processes.constants import ( CWL_NAMESPACE_CWLTOOL_URL, CWL_NAMESPACE_SCHEMA_ID, + CWL_NAMESPACE_SCHEMA_METADATA_AUTHOR, CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY, + CWL_NAMESPACE_SCHEMA_METADATA_CONTRIBUTOR, CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, + CWL_NAMESPACE_SCHEMA_METADATA_PERSON, CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION, CWL_NAMESPACE_SCHEMA_METADATA_SUPPORTED, CWL_NAMESPACE_SCHEMA_METADATA_VERSION, @@ -769,89 +772,77 @@ def _get_package_inputs_outputs(package_factory, # type: CWLFactoryCallable _get_package_io(package_factory, io_select=IO_OUTPUT, as_json=as_json)) -def _update_package_metadata(wps_package_metadata, cwl_package_package): +def _update_package_metadata(wps_metadata, cwl_package): # type: (JSON, CWL) -> None """ Updates the package :term:`WPS` metadata dictionary from extractable `CWL` package definition. """ - wps_package_metadata["title"] = wps_package_metadata.get("title", cwl_package_package.get("label", "")) - wps_package_metadata["abstract"] = wps_package_metadata.get("abstract", cwl_package_package.get("doc", "")) + wps_metadata["title"] = wps_metadata.get("title", cwl_package.get("label", "")) + wps_metadata["abstract"] = wps_metadata.get("abstract", cwl_package.get("doc", "")) if ( - "$schemas" in cwl_package_package - and isinstance(cwl_package_package["$schemas"], list) - and "$namespaces" in cwl_package_package - and isinstance(cwl_package_package["$namespaces"], dict) + CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS in cwl_package and + isinstance(cwl_package[CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS], list) ): - metadata = wps_package_metadata.get("metadata", []) - namespaces_inv = {v: k for k, v in cwl_package_package["$namespaces"].items()} - for schema in cwl_package_package["$schemas"]: - for namespace_url in namespaces_inv: - if schema.startswith(namespace_url): - metadata.append({"title": namespaces_inv[namespace_url], "href": schema}) - wps_package_metadata["metadata"] = metadata - - if ( - CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS in cwl_package_package and - isinstance(cwl_package_package[CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS], list) - ): - wps_package_metadata["keywords"] = list( - set(wps_package_metadata.get("keywords", [])) | - set(cwl_package_package.get(CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, [])) + wps_metadata["keywords"] = list( + set(wps_metadata.get("keywords", [])) | + set(cwl_package.get(CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, [])) ) # specific use case with a different mapping # https://docs.ogc.org/bp/20-089r1.html#toc31 if ( - CWL_NAMESPACE_SCHEMA_METADATA_VERSION in cwl_package_package or - CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION in cwl_package_package + CWL_NAMESPACE_SCHEMA_METADATA_VERSION in cwl_package or + CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION in cwl_package ): version_value = ( - wps_package_metadata.get("version") - or cwl_package_package.get(CWL_NAMESPACE_SCHEMA_METADATA_VERSION) - or cwl_package_package.get(CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION) + wps_metadata.get("version") + or cwl_package.get(CWL_NAMESPACE_SCHEMA_METADATA_VERSION) + or cwl_package.get(CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION) ) # Only set the key if version_value is not empty or null if version_value: - wps_package_metadata["version"] = str(version_value) + wps_metadata["version"] = str(version_value) else: - version_value = wps_package_metadata.get("version") + version_value = wps_metadata.get("version") if version_value: - wps_package_metadata["version"] = str(version_value) + wps_metadata["version"] = str(version_value) schema_ns = f"{CWL_NAMESPACE_SCHEMA_ID}:" - for metadata_mapping in CWL_NAMESPACE_SCHEMA_METADATA_SUPPORTED: - if metadata_mapping in [ # skip handled above + metadata = wps_metadata.get("metadata", []) + for meta_name in CWL_NAMESPACE_SCHEMA_METADATA_SUPPORTED: + if meta_name in [ # skip handled above CWL_NAMESPACE_SCHEMA_METADATA_KEYWORDS, CWL_NAMESPACE_SCHEMA_METADATA_VERSION, CWL_NAMESPACE_SCHEMA_METADATA_SOFTWARE_VERSION, ]: continue - if metadata_mapping in cwl_package_package: - metadata = wps_package_metadata.get("metadata", []) + meta_uri = meta_name.replace(schema_ns, CWL_NAMESPACE_SCHEMA_URL) + + # CWL package => WPS context + if meta_name in cwl_package: if ( - isinstance(cwl_package_package[metadata_mapping], str) - and urlparse(cwl_package_package[metadata_mapping]).scheme != "" + isinstance(cwl_package[meta_name], str) + and urlparse(cwl_package[meta_name]).scheme != "" ): - url = cwl_package_package[metadata_mapping] - if metadata_mapping == CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY: + url = cwl_package[meta_name] + if meta_name == CWL_NAMESPACE_SCHEMA_METADATA_CODE_REPOSITORY: ctype = ContentType.TEXT_HTML else: ctype = get_content_type(os.path.splitext(url)[-1], default=ContentType.TEXT_PLAIN) metadata.append({ "type": ctype, - "rel": metadata_mapping.strip(schema_ns), - "href": cwl_package_package[metadata_mapping] + "rel": meta_uri, + "href": cwl_package[meta_name] }) - elif isinstance(cwl_package_package[metadata_mapping], str): - class_name = metadata_mapping.strip(schema_ns) + elif isinstance(cwl_package[meta_name], str): metadata.append({ - "role": class_name, - "value": cwl_package_package[metadata_mapping] + "role": meta_uri, + "value": cwl_package[meta_name] }) else: - for objects in cwl_package_package[metadata_mapping]: + for objects in cwl_package[meta_name]: class_name = objects["class"].strip(schema_ns) value = { "$schema": f"{CWL_NAMESPACE_SCHEMA_URL}{class_name}" @@ -860,11 +851,45 @@ def _update_package_metadata(wps_package_metadata, cwl_package_package): if key.startswith(schema_ns): value[key.strip(schema_ns)] = val metadata.append({ - "role": metadata_mapping.strip(schema_ns), + "role": meta_uri, "value": value }) + wps_metadata["metadata"] = metadata - wps_package_metadata["metadata"] = metadata + # CWL package <= WPS context + # note: + # this mapping is accomplished only if the CWL did not already result in creating the other mapping + # purposely avoid overriding a field already provided in CWL (the "truth"), even if they mismatch + # this could be done on purpose, such as attributing different authors for the process vs package + else: + metadata_found = [meta for meta in metadata if (meta.get("role") or meta.get("rel")) == meta_uri] + if not metadata_found: + continue + metadata_found = copy.deepcopy(metadata_found) + if meta_name in [CWL_NAMESPACE_SCHEMA_METADATA_AUTHOR, CWL_NAMESPACE_SCHEMA_METADATA_CONTRIBUTOR]: + for meta in metadata_found: + if "value" not in meta or not isinstance(meta["value"], dict): + continue # pragma: no cover # sanity check, should not happen (fails validation) + meta_schema = meta.get("$schema", "").replace(CWL_NAMESPACE_SCHEMA_URL, schema_ns) + meta_schema = meta_schema or CWL_NAMESPACE_SCHEMA_METADATA_PERSON + meta["class"] = meta_schema + meta.pop("role", None) + meta.pop("$schema", None) + meta["value"].pop("$schema", None) + meta_value = meta.pop("value") + for field in list(meta_value): + field_ns = f"{CWL_NAMESPACE_SCHEMA_ID}:{field}" + meta[field_ns] = meta_value.pop(field) + cwl_package[meta_name] = metadata_found + + # all other fields must be a single string + # ignore others to avoid injecting unknown structures that could break the CWL + elif len(metadata_found) == 1: + meta_key = get_any_value(metadata_found[0], key=True) + meta_value = metadata_found[0][meta_key] + if not isinstance(meta_value, str): + continue # pragma: no cover # sanity check, should not happen (fails validation) + cwl_package[meta_name] = meta_value def _patch_wps_process_description_url(reference, process_hint): From c12bc6347df37c3f2d38e91e32127b5b29638ca8 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Sat, 23 Nov 2024 01:07:43 -0500 Subject: [PATCH 18/24] fix CWL enum type invalid --- tests/processes/test_convert.py | 2 +- weaver/processes/convert.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index f24bd2c1b..839876404 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -2301,7 +2301,7 @@ def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float, assert "values.includes(self)" in cwl_value_from assert "self.every(item => values.includes(item))" not in cwl_value_from - assert cwl["inputs"]["enum-time"]["type"] == {"type": "enum", "symbols": ["#12:00", "#24:00"]} + assert cwl["inputs"]["enum-time"] == {"type": "enum", "symbols": ["#12:00", "#24:00"]} assert "inputBinding" not in cwl["inputs"]["enum-time"] diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 864c112f1..967be596a 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -926,7 +926,7 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a if any_colon_symbol: cwl_io_symbols = [f"#{symbol}" for symbol in cwl_io_symbols] - return {"type": cwl_io_type, "symbols": cwl_io_symbols} + return {"type": PACKAGE_ENUM_BASE, "symbols": cwl_io_symbols} if io_select != IO_INPUT: return {"type": cwl_io_type} From f27b50cf01a7e6fd37c32ef6987de34dfc3d16d2 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Sat, 23 Nov 2024 01:13:11 -0500 Subject: [PATCH 19/24] remove unused import --- tests/processes/test_wps_package.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py index 2bb0b0e29..2679a3443 100644 --- a/tests/processes/test_wps_package.py +++ b/tests/processes/test_wps_package.py @@ -35,7 +35,6 @@ from weaver.formats import ContentType from weaver.processes.constants import ( CWL_NAMESPACE_SCHEMA_DEFINITION, - CWL_NAMESPACE_SCHEMA_ID, CWL_NAMESPACE_SCHEMA_URL, CWL_REQUIREMENT_APP_DOCKER, CWL_REQUIREMENT_APP_DOCKER_GPU, From dc423ff33ea19818614866b5b83b3bfc73890f10 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Mon, 25 Nov 2024 13:37:19 -0500 Subject: [PATCH 20/24] fix convert of enum --- weaver/processes/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 967be596a..7c3a14a51 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -926,7 +926,7 @@ def _convert_cwl_io_enum(cwl_io_type, cwl_io_symbols, io_select, allow_unique, a if any_colon_symbol: cwl_io_symbols = [f"#{symbol}" for symbol in cwl_io_symbols] - return {"type": PACKAGE_ENUM_BASE, "symbols": cwl_io_symbols} + return {"type": {"type": PACKAGE_ENUM_BASE, "symbols": cwl_io_symbols}} if io_select != IO_INPUT: return {"type": cwl_io_type} From 221929a19d3d417989b3228ea4e571c5dc0d7fd1 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 26 Nov 2024 03:15:50 -0500 Subject: [PATCH 21/24] fix enum test --- tests/processes/test_convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index 839876404..68293da5d 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -2301,7 +2301,7 @@ def test_ogcapi2cwl_process_cwl_enum_updated(input_str, input_int, input_float, assert "values.includes(self)" in cwl_value_from assert "self.every(item => values.includes(item))" not in cwl_value_from - assert cwl["inputs"]["enum-time"] == {"type": "enum", "symbols": ["#12:00", "#24:00"]} + assert cwl["inputs"]["enum-time"] == {"type": {"type": "enum", "symbols": ["#12:00", "#24:00"]}} assert "inputBinding" not in cwl["inputs"]["enum-time"] From 31ab82ba5f48eaa0a45315dc2d8e2d75d417bd13 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 26 Nov 2024 03:21:55 -0500 Subject: [PATCH 22/24] fix workflow step status/cleanup race condition --- CHANGES.rst | 5 +++++ weaver/processes/wps_package.py | 12 +++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 378d94499..cb0f7d1d3 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -69,6 +69,11 @@ Changes: Fixes: ------ +- Fix race condition between workflow step early input staging cleanup on successful step status update. + Due to the ``_update_status`` method of ``pywps`` performing cleanup when propagating a successful completion of + a step within a workflow, the parent workflow was marked as succeeded (`XML` status document), and any step executed + after the successful one that were depending on the workflow inputs could result in not-found file references if it + was staged by the previous step. - Fix `CWL` ``enum`` type mishandling ``symbols`` containing a colon (``:``) character (e.g.: a list of allowed times) leading to their invalid interpretation as namespaced strings (i.e.: ``:``), in turn failing validation and breaking the resulting `CWL`. Such ``enum`` will be patched with updated ``symbols`` prefixed by ``#`` to respect diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index e4ef73bf7..29a2488f6 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -1939,8 +1939,8 @@ def update_effective_user(self): level=logging.WARNING if (app_euid == "0" or app_egid == "0") else logging.INFO, ) - def update_status(self, message, progress, status, error=None): - # type: (str, Number, AnyStatusType, Optional[Exception]) -> None + def update_status(self, message, progress, status, error=None, step=False): + # type: (str, Number, AnyStatusType, Optional[Exception], bool) -> None """ Updates the :mod:`pywps` real job status from a specified parameters. """ @@ -1961,7 +1961,7 @@ def update_status(self, message, progress, status, error=None): # therefore, use the '_update_status' to enforce the status # using protected method also avoids weird overrides of progress # percent on failure and final 'success' status - self.response._update_status(pywps_status_id, message, self.percent) # noqa: W0212 + self.response._update_status(pywps_status_id, message, self.percent, clean=not step) # noqa: W0212 if isinstance(error, Exception): self.exception_message(exception_type=type(error), exception=error, @@ -1979,11 +1979,17 @@ def step_update_status(self, status, # type: AnyStatusType error=None, # type: Optional[Exception] ): # type: (...) -> None + # ensure the status of the current workflow is not changed to 'success' if up a step-update + # setting to 'success' will report the wrong value in the status XML document + # this would also trigger cleanup, which would remove an staged file needed by a future step + if status == Status.SUCCEEDED: + status = Status.RUNNING self.update_status( message=f"[provider: {target_host}, step: {step_name}] - {str(message).strip()}", progress=map_progress(progress, start_step_progress, end_step_progress), status=status, error=error, + step=True, ) def log(self, level, message, *args, **kwargs): From c5f41080896a9e80db348e60606f5054429c7c34 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 26 Nov 2024 03:32:37 -0500 Subject: [PATCH 23/24] fix html process description without metadata title --- CHANGES.rst | 1 + weaver/wps_restapi/templates/responses/util.mako | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index cb0f7d1d3..85018c400 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -74,6 +74,7 @@ Fixes: a step within a workflow, the parent workflow was marked as succeeded (`XML` status document), and any step executed after the successful one that were depending on the workflow inputs could result in not-found file references if it was staged by the previous step. +- Fix optional ``title`` in metadata causing failing HTML rendering of the `Process` description if omitted. - Fix `CWL` ``enum`` type mishandling ``symbols`` containing a colon (``:``) character (e.g.: a list of allowed times) leading to their invalid interpretation as namespaced strings (i.e.: ``:``), in turn failing validation and breaking the resulting `CWL`. Such ``enum`` will be patched with updated ``symbols`` prefixed by ``#`` to respect diff --git a/weaver/wps_restapi/templates/responses/util.mako b/weaver/wps_restapi/templates/responses/util.mako index b18c46eea..372311f6b 100644 --- a/weaver/wps_restapi/templates/responses/util.mako +++ b/weaver/wps_restapi/templates/responses/util.mako @@ -116,7 +116,15 @@ NOTE: class 'language-json' used by the 'ajax/libs/highlight.js' library inserte
%for meta in metadata:
- ${meta.title} +
+ %if "title" in meta: + Title: ${meta.title} + %elif "role" in meta: + Role: ${meta.role} + %else: + Rel: ${meta.rel} + %endif +
%if "href" in meta: From 9a44ab6b758c5ed296f6cde506e8573859fe1f0e Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 26 Nov 2024 03:51:58 -0500 Subject: [PATCH 24/24] ui fix job status response --- CHANGES.rst | 1 + weaver/wps_restapi/jobs/utils.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 85018c400..2fe047066 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -75,6 +75,7 @@ Fixes: after the successful one that were depending on the workflow inputs could result in not-found file references if it was staged by the previous step. - Fix optional ``title`` in metadata causing failing HTML rendering of the `Process` description if omitted. +- Fix HTML ``Content-Type`` header erroneously set for JSON-only (for now) ``GET /jobs/{jobId}`` as similar endpoints. - Fix `CWL` ``enum`` type mishandling ``symbols`` containing a colon (``:``) character (e.g.: a list of allowed times) leading to their invalid interpretation as namespaced strings (i.e.: ``:``), in turn failing validation and breaking the resulting `CWL`. Such ``enum`` will be patched with updated ``symbols`` prefixed by ``#`` to respect diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 17aa3035d..3f4ee4673 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -324,7 +324,11 @@ def get_job_status_schema(request): def make_headers(resolved_schema): # type: (JobStatusSchemaType) -> HeadersType - content_type = clean_media_type_format(content_accept, strip_parameters=True) + content_type = clean_media_type_format(content_accept.split(",")[0], strip_parameters=True) + # FIXME: support HTML or XML + # (allow transparently for browsers types since Accept did not raise earlier, and no other supported yet) + if content_type in ContentType.ANY_XML | {ContentType.TEXT_HTML}: + content_type = ContentType.APP_JSON content_profile = f"{content_type}; profile={resolved_schema}" content_headers = {"Content-Type": content_profile} if resolved_schema == JobStatusSchema.OGC: