diff --git a/CHANGES.rst b/CHANGES.rst index 263abe3c8..a7dae625a 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,10 +14,32 @@ Changes: -------- - Add support of Python 3.13. - Drop support of Python 3.8. - -Fixes: ------- -- No change. +- Add support of *OGC API - Processes - Part 4: Job Management* related to ``PROV`` requirement and conformance classes. +- Add support of `W3C PROV `_ to provide ``GET /jobs/{jobId}/prov`` endpoints + and all underlying paths (``/info``, ``/who``, ``/run``, ``/inputs``, ``/outputs``, and ``../{runId}`` variants) + to retrieve provenance metadata from a `Job` execution and its corresponding `Process` and `Workflow` definitions, + as processed by ``cwltool``/``cwlprov`` and extended by `Weaver`-specific server metadata. + Supported ``PROV`` representations are ``PROV-N``, ``PROV-NT``, ``PROV-JSON``, ``PROV-JSONLD``, ``PROV-XML`` + and ``PROV-TURTLE``, each of which can be obtained by providing the corresponding ``Accept`` headers. +- Add ``weaver.cwl_prov`` configuration option to control the new ``PROV`` metadata collection feature. +- Add ``prov`` and ``provenance`` CLI and ``WeaverClient`` operations. +- Extend ``weaver.cli.WeaverArgumentParser`` "*rules*" to allow returning an error message providing better + case-by-case details about the specific cause of failure handled by the *rule* callable. +- Update certain ``cornice`` service definitions that were using "``prov``" as referencing to `Providers` to avoid + confusion with the multiple ``PROV``/`Provenance` related terminology and services added for the new feature. +- Pin ``cwltool==3.1.20241217163858`` to employ the official release including + ``PROV`` configuration provided to easily configured `Weaver` + (relates to `common-workflow-language/cwltool#2082 _) + and integrate previously provided fixes + (relates to `common-workflow-language/cwltool#2082 _) + that were applied by a forked backport ``https://github.com/fmigneault/cwltool`` repository. + +Fixes: +------ +- Fix missing documentation about certain ``WeaverClient`` operations. +- Fix ``weaver.cli.OperationResult`` not setting its ``text`` property when a valid non-`JSON` response is obtained. +- Fix the `API` frontpage `HTML` rendering to returning enabled features and corresponding ``doc``/``url``/``api`` + endpoints for quick referencing the capabilities activated for a `Weaver` instance. .. _changes_6.0.0: diff --git a/config/weaver.ini.example b/config/weaver.ini.example index 03aa221bb..b8a682ee9 100644 --- a/config/weaver.ini.example +++ b/config/weaver.ini.example @@ -100,6 +100,11 @@ weaver.cwl_egid = weaver.cwl_processes_dir = weaver.cwl_processes_register_error = false +# provenance functionality +# if disabled, provenance details will not be collected when running Application Packages and Workflows +# if disabled, the '/jobs/{jobId}/prov' endpoint will always report missing information since unavailable +weaver.cwl_prov = true + # --- Weaver WPS settings --- weaver.wps = true weaver.wps_url = @@ -128,7 +133,7 @@ weaver.wps_metadata_identification_keywords=Weaver,WPS,OGC # access constraints can be comma-separated weaver.wps_metadata_identification_accessconstraints=NONE weaver.wps_metadata_identification_fees=NONE -weaver.wps_metadata_provider_name=CRIM +weaver.wps_metadata_provider_name=Computer Research Institute of MontrĂ©al (CRIM) weaver.wps_metadata_provider_url=http://pavics-weaver.readthedocs.org/en/latest/ weaver.wps_metadata_contact_name=Francis Charette-Migneault weaver.wps_metadata_contact_position=Research Software Developer diff --git a/docs/source/appendix.rst b/docs/source/appendix.rst index de18c19da..ea0f76d58 100644 --- a/docs/source/appendix.rst +++ b/docs/source/appendix.rst @@ -250,6 +250,15 @@ Glossary Entity that describes the required inputs, produced outputs, and any applicable metadata for the execution of the defined script, calculation, or operation. + PROV + Provenance + Metadata using the :term:`W3C` |PROV|_ standard that is applied to a submitted :term:`Job` execution to allow + retrieving its origin, the related :term:`Application Package`, its :term:`I/O` sources and results, as well as + additional details about the server host and runtime user as applicable to replicate the experiment. + + .. seealso:: + :ref:`proc_op_job_prov` + Provider Entity that offers an ensemble of :term:`Process` under it. It is typically a reference to a remote service, where any :term:`Process` it provides is fetched dynamically on demand. @@ -331,6 +340,9 @@ Glossary Since |ogc-api-standards|_ are based on HTTP and web communications, this consortium establishes the common foundation definitions used by the :term:`API` specifications. + .. seealso:: + |w3c|_ + WKT Well-Known Text geometry representation. diff --git a/docs/source/cli.rst b/docs/source/cli.rst index 179a230c1..0603ddc74 100644 --- a/docs/source/cli.rst +++ b/docs/source/cli.rst @@ -33,14 +33,29 @@ Python Client Commands For details about using the Python :py:class:`weaver.cli.WeaverClient`, please refer directly to its class documentation and its underlying methods. +* :py:meth:`weaver.cli.WeaverClient.info` +* :py:meth:`weaver.cli.WeaverClient.version` +* :py:meth:`weaver.cli.WeaverClient.conformance` +* :py:meth:`weaver.cli.WeaverClient.register` +* :py:meth:`weaver.cli.WeaverClient.unregister` * :py:meth:`weaver.cli.WeaverClient.deploy` * :py:meth:`weaver.cli.WeaverClient.undeploy` * :py:meth:`weaver.cli.WeaverClient.capabilities` * :py:meth:`weaver.cli.WeaverClient.describe` +* :py:meth:`weaver.cli.WeaverClient.package` +* :py:meth:`weaver.cli.WeaverClient.jobs` +* :py:meth:`weaver.cli.WeaverClient.trigger_job` +* :py:meth:`weaver.cli.WeaverClient.update_job` * :py:meth:`weaver.cli.WeaverClient.execute` * :py:meth:`weaver.cli.WeaverClient.monitor` -* :py:meth:`weaver.cli.WeaverClient.dismiss` * :py:meth:`weaver.cli.WeaverClient.status` +* :py:meth:`weaver.cli.WeaverClient.inputs` +* :py:meth:`weaver.cli.WeaverClient.outputs` +* :py:meth:`weaver.cli.WeaverClient.logs` +* :py:meth:`weaver.cli.WeaverClient.statistics` +* :py:meth:`weaver.cli.WeaverClient.exceptions` +* :py:meth:`weaver.cli.WeaverClient.provenance` +* :py:meth:`weaver.cli.WeaverClient.dismiss` * :py:meth:`weaver.cli.WeaverClient.results` * :py:meth:`weaver.cli.WeaverClient.upload` @@ -479,6 +494,59 @@ Sample Output: .. literalinclude:: ../../weaver/wps_restapi/examples/job_results.json :language: json +.. _cli_example_job_prov: + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Job Provenance Example +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Accomplishes the :term:`Job` |PROV|_ request to obtain :term:`Provenance` metadata. + +Below examples employ the ``Echo`` :term:`Process` available in |weaver-func-test-apps|_ +and assume the referenced :term:`Job` was completed successfully. + +.. note:: + There fore multiple alternative format representations offered by this operation. + Not all of them are presented below. See the various ``prov_type`` and ``prov_format`` + parameters for the combinations. + +.. seealso:: + - :ref:`proc_op_job_prov` provides more details about available endpoints, operations and metadata returned. + +.. code-block:: shell + :caption: Command Line + + weaver prov -u ${WEAVER_URL} -j "1c49f085-bbd7-410d-a801-81fd42469e8a" --pT run + +.. code-block:: python + :caption: Python + + from weaver.provenance import ProvenancePathType + + client.prov("1c49f085-bbd7-410d-a801-81fd42469e8a", prov_type=ProvenancePathType.PROV_RUN) + +Sample Output: + +.. literalinclude:: ../../weaver/wps_restapi/examples/job_prov_run.txt + :language: text + +.. code-block:: shell + :caption: Command Line + + weaver prov -u ${WEAVER_URL} -nL --pF "PROV-JSON" + +.. code-block:: python + :caption: Python + + from weaver.provenance import ProvenanceFormat + + client.prov("1c49f085-bbd7-410d-a801-81fd42469e8a", prov_format=ProvenanceFormat.PROV_N) + +Sample Output: + +.. literalinclude:: ../../weaver/wps_restapi/examples/job_prov.txt + :language: text + .. _cli_example_upload: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index cd3c97944..d7aa1c398 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -101,6 +101,23 @@ they are optional and which default value or operation is applied in each situat .. versionadded:: 1.9 +.. _weaver-cwl-prov: + +- | ``weaver.cwl_prov = true|false`` [:class:`bool`-like] + | (default: ``true``) + | + | Configure whether :term:`W3C` |PROV|_ functionality using the :ref:`proc_op_job_prov` endpoints should be enabled + to collect :term:`Provenance` metadata when executing the underlying :term:`CWL` of a given :term:`Process` + or :term:`Workflow`. + + .. note:: + + Any pre-existing :term:`Job` that was created when this option did not yet exist or that was executed while + it was disabled will not offer :term:`Provenance` metadata. This is intrinsic to the functionality that must obtain + timely metadata *while* executing to properly represent operational steps and :term:`Job` updates as they occur. + + .. versionadded:: 6.1 + .. _weaver-wps: - | ``weaver.wps = true|false`` [:class:`bool`-like] diff --git a/docs/source/processes.rst b/docs/source/processes.rst index 1bedb0d49..e415e28d1 100644 --- a/docs/source/processes.rst +++ b/docs/source/processes.rst @@ -173,7 +173,7 @@ through some parsing (e.g.: :ref:`proc_wps_12`) or with some requirement indicat special handling. The represented :term:`Process` is aligned with |ogc-api-proc|_ specifications. When deploying one such :term:`Process` directly, it is expected to have a definition specified -with a :term:`CWL` `Application Package`_, which provides resources about one of the described :ref:`app_pkg_types`. +with a :term:`CWL` :ref:`application-package`, which provides resources about one of the described :ref:`app_pkg_types`. This is most of the time employed to wrap operations packaged in a reference :term:`Docker` image, but it can also wrap :ref:`app_pkg_remote` to be executed on another server (i.e.: :term:`ADES`). When the :term:`Process` should be @@ -490,6 +490,8 @@ the |getcap-req|_ request. Modify an Existing Process (Update, Replace, Undeploy) ----------------------------------------------------------------------------- +.. versionadded:: 4.20 + Since `Weaver` supports |ogc-api-proc-part2|_, it is able to remove a previously registered :term:`Process` using the :ref:`Deployment ` request. The undeploy operation consist of a ``DELETE`` request targeting the specific ``{WEAVER_URL}/processes/{processID}`` to be removed. @@ -498,8 +500,6 @@ specific ``{WEAVER_URL}/processes/{processID}`` to be removed. The :term:`Process` must be accessible by the user considering any visibility configuration to perform this step. See :ref:`proc_op_deploy` section for details. -.. versionadded:: 4.20 - Starting from version `4.20 `_, a :term:`Process` can be replaced or updated using respectively the ``PUT`` and ``PATCH`` requests onto the specific ``{WEAVER_URL}/processes/{processID}`` location of the reference to modify. @@ -1989,7 +1989,7 @@ the configured :term:`WPS` output directory. Header ``X-WPS-Output-Context`` is ignored when using `S3` buckets for output location since they are stored individually per :term:`Job` UUID, and hold no relevant *context* location. See also :ref:`conf_s3_buckets`. -.. versionadded:: 4.3 +.. versionchanged:: 4.3 Addition of the ``X-WPS-Output-Context`` header. .. _proc_op_execute_subscribers: @@ -2419,10 +2419,118 @@ Note again that the more the :term:`Process` is verbose, the more tracking will Job Provenance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. fixme: CWL and Job Prov (https://github.com/crim-ca/weaver/issues/673) -.. todo:: - implement ``GET /jobs/{jobID}/run`` and/or ``GET /jobs/{jobID}/prov`` - (see https://github.com/crim-ca/weaver/issues/673) +.. versionadded:: 6.1 + +The provenance endpoints allow to obtain :term:`W3C` |PROV|_ metadata from a successfully completed :term:`Job` +using various representations. This provenance information can help identify traceability information such as the input +data sources, validate output checksums, and understand all internal :term:`Process` data transformations that were +involved within an executed :term:`Workflow`. + +The |PROV|_ metadata consists of information records about entities, activities, and people involved in producing a +piece of data or thing |PROV-dfn|_, which can be used to form assessments about its quality, reliability or +trustworthiness. + +.. |PROV-dfn| replace:: :sup:`[^]` +.. _PROV-dfn: https://www.w3.org/TR/2013/REC-prov-dm-20130430/#dfn-provenance + +.. seealso:: + - |PROV-overview|_ + - |cwltool-cwlprov|_ + +.. figure:: https://www.w3.org/TR/2013/REC-prov-o-20130430/diagrams/starting-points.svg + :alt: PROV-O Resources + :target: `PROV-O`_ + :align: center + :width: 500px + + Provenance Resource Relationships [|PROV-O|_] + + +The provenance endpoints are provided in alignment with the |ogc-api-proc-part4|_ provenance class requirement. +However, `Weaver` also provides additional functionalities in comparison to the minimal requirements from the +:term:`OGC` specification. + +Following is a table of available formats and corresponding endpoints offered by `Weaver`. + +.. list-table:: Job Provenance Endpoints + :name: table-job-prov + :align: center + :header-rows: 1 + :widths: 25,10,20,45 + + * - Endpoint + - |PROV|_ Format + - :term:`Media-Type` + - Description + * - ``/jobs/{jobID}/prov`` + - |PROV-JSON|_ + - ``application/json`` + - :term:`Provenance` metadata using :term:`JSON` representation. + * - ``/jobs/{jobID}/prov`` + - |PROV-JSONLD|_ + - ``application/ld+json`` + - :term:`Provenance` metadata using |JSON-LD|_ representation. + * - ``/jobs/{jobID}/prov`` + - |PROV-XML|_ + - ``text/xml`` or ``application/xml`` + - :term:`Provenance` metadata using :term:`XML` representation. + * - ``/jobs/{jobID}/prov`` + - |PROV-N|_ + - ``text/provenance-notation`` + - :term:`Provenance` metadata using the main |PROV|_ notation representation. + * - ``/jobs/{jobID}/prov`` + - PROV-NT + - ``application/n-triples`` + - :term:`Provenance` metadata using |rdf-n-triples|_ (NT) representation. + * - ``/jobs/{jobID}/prov`` + - PROV-TURTLE + - ``text/turtle`` + - :term:`Provenance` metadata using |rdf-turtle|_ (TTL) representation. + * - ``/jobs/{jobID}/prov/info`` + - |na| + - ``text/plain`` + - Metadata about the *Research Object* packaging information. + * - ``/jobs/{jobID}/prov/who`` + - |na| + - ``text/plain`` + - Metadata of who ran the :term:`Job`. + * - ``/jobs/{jobID}/prov/runs`` + - |na| + - ``text/plain`` + - Obtain the list of ``runID`` steps of the :term:`Workflow` within the :term:`Job`. + * - ``/jobs/{jobID}/prov/run`` + - |na| + - ``text/plain`` + - Metadata of the main :term:`Job` and any nested step runs in the case of a :term:`Workflow`. + * - ``/jobs/{jobID}/prov/inputs`` + - |na| + - ``text/plain`` + - Metadata about the :term:`Job` input IDs. + * - ``/jobs/{jobID}/prov/outputs`` + - |na| + - ``text/plain`` + - Metadata about the :term:`Job` output IDs. + * - ``/jobs/{jobID}/prov/[run|inputs|outputs]/{runID}`` + - |na| + - ``text/plain`` + - Same as their respective definitions above, but for a specific step of a :term:`Workflow`. + +.. seealso:: + This feature is enabled by default. Its functionality and the corresponding :term:`API` endpoints + can be controlled using :ref:`Configuration Option ` ``weaver.cwl_prov``. + +Resulting metadata that is collected from :term:`Job` :term:`Provenance` will be stored under a similar endpoint +as the :ref:`exec_output_location`, except with an additional ``-prov`` suffix applied after the :term:`Job` UUID, +as shown below. +This location is selected to conveniently offer the ``PROV`` metadata with a different parent directory than +the :term:`Job` outputs, therefore allowing different endpoint access control schemes between the ``PROV`` metadata +and actual output data, while also reusing the configured :ref:`exec_output_location` that can be used to quickly +serve :term:`Provenance` contents without any additional configuration. + +.. code-block:: + + {WPS_OUTPUT_URL}[/{WPS_OUTPUT_CONTEXT}]/{JOB_UUID}-prov + .. _proc_op_job_stats: diff --git a/docs/source/references.rst b/docs/source/references.rst index 8bbf7831b..36a617dc3 100644 --- a/docs/source/references.rst +++ b/docs/source/references.rst @@ -62,6 +62,8 @@ .. |cwl-metadata-schema-org| replace:: RDF Schema Definitions .. _cwl-metadata-schema-org: https://schema.org/version/latest/schemaorg-current-https.rdf .. _docker: https://docs.docker.com/develop/ +.. |cwltool-cwlprov| replace:: CWLProv - Provenance Capture with :mod:`cwltool` +.. _cwltool-cwlprov: https://cwltool.readthedocs.io/en/latest/CWLProv.html .. |docker| replace:: Docker .. |ems| replace:: Execution Management Service .. |esgf| replace:: Earth System Grid Federation @@ -172,6 +174,26 @@ .. _openeo-api: https://openeo.org/documentation/1.0/developers/api/reference.html .. |OpenAPI-spec| replace:: OpenAPI Specification .. _OpenAPI-spec: https://spec.openapis.org/oas/v3.1.0 +.. |JSON-LD| replace:: JSON Linked Data +.. _JSON-LD: https://json-ld.org/ +.. |PROV| replace:: PROV +.. _PROV: https://www.w3.org/TR/prov-overview/ +.. |PROV-JSON| replace:: PROV-JSON +.. _PROV-JSON: https://www.w3.org/submissions/prov-json/ +.. |PROV-JSONLD| replace:: PROV-JSONLD +.. _PROV-JSONLD: https://www.w3.org/submissions/prov-jsonld/ +.. |PROV-N| replace:: PROV-N +.. _PROV-N: https://www.w3.org/TR/prov-n/ +.. |PROV-overview| replace:: PROV Overview +.. _PROV-overview: https://www.w3.org/TR/prov-overview/ +.. |PROV-O| replace:: PROV-O: The PROV Ontology +.. _PROV-O: https://www.w3.org/TR/2013/REC-prov-o-20130430/ +.. |PROV-XML| replace:: PROV-XML +.. _PROV-XML: https://www.w3.org/TR/2013/NOTE-prov-xml-20130430/ +.. |rdf-n-triples| replace:: RDF N-Triples +.. _rdf-n-triples: https://www.w3.org/TR/n-triples/ +.. |rdf-turtle| replace:: RDF Turtle +.. _rdf-turtle: https://www.w3.org/TR/rdf12-turtle/ .. |pywps| replace:: PyWPS .. _pywps: https://github.com/geopython/pywps/ .. |pywps-status| replace:: Progress and Status Report @@ -184,6 +206,8 @@ .. _weaver-issues: https://github.com/crim-ca/weaver/issues .. |submit-issue| replace:: submit a new issue .. _submit-issue: https://github.com/crim-ca/weaver/issues/new/choose +.. |w3c| replace:: W3C +.. _w3c: https://www.w3.org/ .. STAC .. |stac-spec| replace:: STAC Specification @@ -202,7 +226,7 @@ .. Example references .. |examples| replace:: Examples .. _examples: examples.rst -.. |weaver-func-test-apps| replace:: Weaver functional tests +.. |weaver-func-test-apps| replace:: Weaver functional tests Application Packages .. _weaver-func-test-apps: https://github.com/crim-ca/weaver/tree/master/tests/functional/application-packages .. |ogc-testbeds-apps| replace:: OGC-Testbeds Applications .. _ogc-testbeds-apps: https://github.com/crim-ca/application-packages diff --git a/requirements.txt b/requirements.txt index 1702261d4..7c8643e91 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,8 +42,10 @@ cryptography # use cwltool gpu-enabled support until integrated within the original tool # (https://github.com/common-workflow-language/common-workflow-language/issues/587) ### git+https://github.com/crim-ca/cwltool@docker-gpu#egg=cwltool -##cwltool==3.1.20230906142556 -cwltool @ git+https://github.com/fmigneault/cwltool.git@fix-load-contents-array +cwltool==3.1.20241217163858 +# for some reason, not the same release number, +# but same code as https://github.com/common-workflow-language/cwlprov/tree/0.6.0 +cwlprov==0.1.1 dnspython>=2.6.1 # not directly required, pinned by Snyk to avoid a vulnerability; via pymongo dependency docker>=7.1 duration diff --git a/setup.cfg b/setup.cfg index 5125bd37e..b4cca5ff1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -69,6 +69,7 @@ markers = remote: mark test with remote Weaver instance requirement vault: mark test with Vault file feature validation html: mark test as related to HTML rendering + prov: mark test as related to PROV operations oap_part1: mark test as 'OGC API - Processes - Part 1: Core' functionalities oap_part2: mark test as 'OGC API - Processes - Part 2: Deploy, Replace, Undeploy (DRU)' functionalities oap_part3: mark test as 'OGC API - Processes - Part 3: Workflows and Chaining' functionalities diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py index 028b66679..316742523 100644 --- a/tests/functional/test_cli.py +++ b/tests/functional/test_cli.py @@ -15,6 +15,7 @@ import mock import pytest +import yaml from owslib.ows import DEFAULT_OWS_NAMESPACE from owslib.wps import WPSException from parameterized import parameterized @@ -22,6 +23,7 @@ from webtest import TestApp as WebTestApp from tests import resources +from tests.functional.test_job_provenance import TestJobProvenanceBase from tests.functional.utils import JobUtils, ResourcesUtil, WpsConfigBase from tests.utils import ( get_weaver_url, @@ -45,6 +47,7 @@ from weaver.notify import decrypt_email from weaver.processes.constants import CWL_REQUIREMENT_APP_DOCKER, ProcessSchema from weaver.processes.types import ProcessType +from weaver.provenance import ProvenanceFormat, ProvenancePathType from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory from weaver.utils import fully_qualified_name, get_registry from weaver.visibility import Visibility @@ -2566,3 +2569,82 @@ def test_describe_auth(self): assert any(f"\"id\": \"{proc}\"" in line for line in lines) assert any("\"inputs\": {" in line for line in lines) assert any("\"outputs\": {" in line for line in lines) + + +@pytest.mark.prov +class TestWeaverClientProv(TestWeaverClientBase, TestJobProvenanceBase): + def setUp(self): + # purposely omit 'TestWeaverClientBase' setup to + # avoid clearing the generated job with PROV metadata + TestJobProvenanceBase.setUp(self) + + def test_prov(self): + result = mocked_sub_requests(self.app, self.client.prov, self.job_url) + assert result.success + assert result.headers["Content-Type"] == ContentType.APP_JSON + assert isinstance(result.body, dict), "body should be the PROV-JSON" + assert "actedOnBehalfOf" in result.body + assert "agent" in result.body + assert "crim-ca/weaver" in str(result.body["agent"]) + assert "cwltool" in str(result.body["agent"]) + + def test_prov_yaml_by_output_format(self): + result = mocked_sub_requests(self.app, self.client.prov, self.job_url, output_format=OutputFormat.YAML) + assert result.success + assert result.headers["Content-Type"] == ContentType.APP_JSON, "original type should still be JSON (from API)" + assert isinstance(result.body, dict), "response body should still be the original PROV-JSON" + assert isinstance(result.text, str), "text property should be the PROV-JSON represented as YAML string" + assert yaml.safe_load(result.text) == result.body, "PROV-JSON contents should be identical in YAML format" + assert "actedOnBehalfOf" in result.text + assert "agent" in result.text + assert "crim-ca/weaver" in str(result.text) + assert "cwltool" in str(result.text) + + def test_prov_xml_by_prov_format(self): + result = mocked_sub_requests(self.app, self.client.prov, self.job_url, prov_format=ProvenanceFormat.PROV_XML) + assert result.success + assert result.headers["Content-Type"] == ContentType.APP_XML, "original type should still be XML (from API)" + assert isinstance(result.body, str), "body should be the PROV-XML representation" + assert "actedOnBehalfOf" in result.body + assert "agent" in result.body + assert "crim-ca/weaver" in str(result.body) + assert "cwltool" in str(result.body) + + def test_prov_info(self): + result = mocked_sub_requests(self.app, self.client.prov, self.job_url, prov=ProvenancePathType.PROV_INFO) + assert result.success + assert result.headers["Content-Type"] == ContentType.TEXT_PLAIN + assert "Research Object of CWL workflow run" in result.text + assert self.job_id in result.text + + def test_prov_run(self): + result = mocked_sub_requests(self.app, self.client.prov, self.job_url, prov=ProvenancePathType.PROV_RUN) + assert result.success + assert result.headers["Content-Type"] == ContentType.TEXT_PLAIN + assert self.proc_id in result.text + assert self.job_id in result.text + assert "< wf:main/message" in result.text, ( + "Indication of inward input 'message' ID should be present" + ) + assert f"> wf:main/{self.proc_id}/output" in result.text, ( + "Indication of outward result 'output' ID should be present" + ) + + def test_prov_run_with_id(self): + result = mocked_sub_requests( + self.app, + self.client.prov, + self.job_url, + prov=ProvenancePathType.PROV_RUN, + prov_run_id=self.job_id, # redundant in this case, but test that parameter is parsed and resolves + ) + assert result.success + assert result.headers["Content-Type"] == ContentType.TEXT_PLAIN + assert self.proc_id in result.text + assert self.job_id in result.text + assert "< wf:main/message" in result.text, ( + "Indication of inward input 'message' ID should be present" + ) + assert f"> wf:main/{self.proc_id}/output" in result.text, ( + "Indication of outward result 'output' ID should be present" + ) diff --git a/tests/functional/test_job_provenance.py b/tests/functional/test_job_provenance.py new file mode 100644 index 000000000..eafb7d980 --- /dev/null +++ b/tests/functional/test_job_provenance.py @@ -0,0 +1,298 @@ +import contextlib +import copy +import itertools +import os +import uuid +from typing import TYPE_CHECKING + +import pytest +from parameterized import parameterized + +from tests.functional.utils import ResourcesUtil, WpsConfigBase +from tests.utils import mocked_execute_celery, mocked_sub_requests, mocked_wps_output +from weaver.formats import ContentType, OutputFormat +from weaver.provenance import ProvenanceFormat, ProvenancePathType +from weaver.status import Status + +if TYPE_CHECKING: + from typing import Optional + + from weaver.typedefs import AnyUUID + + +@pytest.mark.prov +class TestJobProvenanceBase(WpsConfigBase, ResourcesUtil): + job_id = None # type: Optional[AnyUUID] + job_url = None # type: Optional[str] + proc_id = None # type: Optional[str] + + @classmethod + def setUpClass(cls) -> None: + cls.settings = copy.deepcopy(cls.settings or {}) + settings = { + "weaver.cwl_prov": True, + "weaver.wps_metadata_provider_name": "TestJobProvenanceBase", # metadata employed by PROV + "weaver.wps_metadata_provider_url": "http://localhost/", # metadata employed by PROV + "weaver.wps": True, + "weaver.wps_path": "/ows/wps", + "weaver.wps_restapi_path": "/", + "weaver.wps_output_path": "/wpsoutputs", + "weaver.wps_output_url": "http://localhost/wpsoutputs", + "weaver.wps_output_dir": "/tmp/weaver-test/wps-outputs", # nosec: B108 # don't care hardcoded for test + } + cls.settings.update(settings) + super(TestJobProvenanceBase, cls).setUpClass() + cls.setup_test_job() + + @classmethod + def tearDownClass(cls): + cls.process_store.clear_processes() + cls.job_store.clear_jobs() + super(TestJobProvenanceBase, cls).tearDownClass() + + @classmethod + def setup_test_job(cls): + cls.proc_id = cls.fully_qualified_test_name(cls, "Echo") + cwl = cls.retrieve_payload("Echo", "package", local=True) + body = { + "processDescription": { + "id": cls.proc_id, + }, + "executionUnit": [{"unit": cwl}], + } + cls.deploy_process(body) + data = { + "inputs": {"message": "0123456789"}, + } + with contextlib.ExitStack() as stack_exec: + for mock_exec in mocked_execute_celery(): + stack_exec.enter_context(mock_exec) + stack_exec.enter_context(mocked_wps_output(cls.settings)) + proc_url = f"/processes/{cls.proc_id}/execution" + headers = {"Prefer": "respond-async"} + headers.update(cls.json_headers) + resp = mocked_sub_requests( + cls.app, "post_json", proc_url, + data=data, headers=headers, + timeout=5, only_local=True + ) + assert resp.status_code == 201, resp.text + status_url = resp.headers.get("location") + cls.monitor_job(status_url, return_status=True) + cls.job_url = status_url + cls.job_id = status_url.rsplit("/", 1)[-1] + + +@pytest.mark.prov +@pytest.mark.oap_part4 +@pytest.mark.functional +class TestJobProvenance(TestJobProvenanceBase): + """ + Tests to evaluate the various endpoints for :term:`Job` :term:`Provenance`. + """ + @parameterized.expand([ + ({}, {}), # default is JSON + ({"f": OutputFormat.JSON}, {}), + ({}, {"Accept": ContentType.APP_JSON}), + ]) + def test_job_prov_json(self, queries, headers): + prov_url = f"{self.job_url}/prov" + resp = self.app.get(prov_url, params=queries, headers=headers) + assert resp.status_code == 200 + assert resp.content_type == ContentType.APP_JSON + prov = resp.json + assert "prefix" in prov + assert "wfprov" in prov["prefix"] + + @parameterized.expand([ + ({"f": OutputFormat.XML}, {}), + ({}, {"Accept": ContentType.TEXT_XML}), + ({}, {"Accept": ContentType.APP_XML}), + ]) + def test_job_prov_xml(self, queries, headers): + prov_url = f"{self.job_url}/prov" + resp = self.app.get(prov_url, params=queries, headers=headers) + assert resp.status_code == 200 + assert resp.content_type in ContentType.ANY_XML + prov = resp.text + assert " None: + cls.settings = copy.deepcopy(cls.settings or {}) + settings = { + "weaver.cwl_prov": False, # NOTE: this is the test + "weaver.wps": True, + "weaver.wps_path": "/ows/wps", + "weaver.wps_restapi_path": "/", + "weaver.wps_output_path": "/wpsoutputs", + "weaver.wps_output_url": "http://localhost/wpsoutputs", + "weaver.wps_output_dir": "/tmp/weaver-test/wps-outputs", # nosec: B108 # don't care hardcoded for test + } + cls.settings.update(settings) + + # don't call 'TestJobProvenanceBase.setUpClass', but it's parents 'setUpClass' instead + # to configure the web test application the same way with above settings, + # while making sure to avoid re-enabling 'weaver.cwl_prov = true' + super(TestJobProvenanceBase, cls).setUpClass() + + # NOTE: + # by doing the execution embedded in job setup + # most of the code paths without provenance will already be validated + # only need to validate the remaining results to match expectations + cls.setup_test_job() + + @parameterized.expand( + itertools.product( + [None, ProvenancePathType.PROV], + ProvenanceFormat.formats(), + ) + ) + def test_prov_not_created(self, prov_endpoint, prov_fmt): + """ + Validate that disabled :term:`Provenance` feature works and that none is generated from an execution. + """ + job = self.job_store.fetch_by_id(self.job_id) + prov_path = job.prov_path(extra_path=prov_endpoint, prov_format=prov_fmt, container=self.settings) + if prov_path is None: + pytest.skip("Ignore invalid combination of PROV path/format.") + assert not os.path.exists(prov_path) + + @parameterized.expand(ProvenancePathType.values()) + def test_prov_not_found(self, prov_endpoint): + """ + Validate that disabled :term:`Provenance` feature works and that endpoints are not available. + """ + prov_url = f"/jobs/{self.job_id}{prov_endpoint}" + resp = self.app.get(prov_url, expect_errors=True) + assert resp.status_code == 404 diff --git a/tests/functional/utils.py b/tests/functional/utils.py index b995d919e..4dd2f4022 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -65,6 +65,7 @@ class GenericUtils(unittest.TestCase): def fully_qualified_test_name(self, name=""): + # type: (str) -> str """ Generates a unique name using the current test method full context name and the provided name, if any. @@ -72,7 +73,10 @@ def fully_qualified_test_name(self, name=""): """ extra_name = f"-{name}" if name else "" class_name = fully_qualified_name(self) - test_name = f"{class_name}.{self._testMethodName}{extra_name}" + if hasattr(self, "_testMethodName"): + test_name = f"{class_name}.{self._testMethodName}{extra_name}" + else: + test_name = f"{class_name}{extra_name}" # called from class method test_name = test_name.replace(".", "-").replace("-_", "_").replace("_-", "-") return test_name @@ -449,24 +453,28 @@ def deploy_process(cls, info.append(deepcopy(resp.json)) return info # type: ignore - def _try_get_logs(self, status_url): - _resp = self.app.get(f"{status_url}/logs", headers=dict(self.json_headers)) + @classmethod + def _try_get_logs(cls, status_url): + _resp = cls.app.get(f"{status_url}/logs", headers=dict(cls.json_headers)) if _resp.status_code == 200: _text = "\n".join(_resp.json) return f"Error logs:\n{_text}" return "" @overload - def monitor_job(self, status_url, **__): + @classmethod + def monitor_job(cls, status_url, **__): # type: (str, **Any) -> ExecutionResults ... @overload - def monitor_job(self, status_url, return_status=False, **__): + @classmethod + def monitor_job(cls, status_url, return_status=False, **__): # type: (str, Literal[True], **Any) -> JobStatusResponse ... - def monitor_job(self, + @classmethod + def monitor_job(cls, status_url, # type: str timeout=None, # type: Optional[int] interval=None, # type: Optional[int] @@ -501,17 +509,17 @@ def check_job_status(_resp, running=False): body = _resp.json pretty = json.dumps(body, indent=2, ensure_ascii=False) statuses = [Status.ACCEPTED, Status.RUNNING, final_status] if running else [final_status] - assert _resp.status_code == 200, f"Execution failed:\n{pretty}\n{self._try_get_logs(status_url)}" - assert body["status"] in statuses, f"Error job info:\n{pretty}\n{self._try_get_logs(status_url)}" + assert _resp.status_code == 200, f"Execution failed:\n{pretty}\n{cls._try_get_logs(status_url)}" + assert body["status"] in statuses, f"Error job info:\n{pretty}\n{cls._try_get_logs(status_url)}" return body["status"] in {final_status, Status.SUCCEEDED, Status.FAILED} # break condition time.sleep(1) # small delay to ensure process execution had a chance to start before monitoring - left = timeout or self.monitor_timeout - delta = interval or self.monitor_interval + left = timeout or cls.monitor_timeout + delta = interval or cls.monitor_interval once = True resp = None while left >= 0 or once: - resp = self.app.get(status_url, headers=self.json_headers) + resp = cls.app.get(status_url, headers=cls.json_headers) if check_job_status(resp, running=True): break time.sleep(delta) @@ -521,7 +529,7 @@ def check_job_status(_resp, running=False): if return_status or expect_failed: return resp.json params = {"schema": JobInputsOutputsSchema.OGC} # not strict to preserve old 'format' field - resp = self.app.get(f"{status_url}/results", params=params, headers=self.json_headers) + resp = cls.app.get(f"{status_url}/results", params=params, headers=cls.json_headers) assert resp.status_code == 200, f"Error job info:\n{resp.text}" return resp.json diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py index 2679a3443..b6fff68a0 100644 --- a/tests/processes/test_wps_package.py +++ b/tests/processes/test_wps_package.py @@ -30,7 +30,7 @@ from pywps.validator.mode import MODE from tests.utils import assert_equal_any_order -from weaver.datatype import Process +from weaver.datatype import Job, Process from weaver.exceptions import PackageExecutionError, PackageTypeError from weaver.formats import ContentType from weaver.processes.constants import ( @@ -80,6 +80,10 @@ def __init__(self, *_, **__): super(MockWpsPackage, self).__init__(*_, **__) self.mock_status_location = None + @property + def job(self): + return Job(task_id="MockWpsPackage") + @property def status_location(self): return self.mock_status_location @@ -198,17 +202,22 @@ def test_stdout_stderr_logging_for_commandline_tool_success(caplog): r".*", log_data, re.MULTILINE | re.DOTALL - ) + ), f"Captured Log Information expected in:\n{log_data}" # cwltool call with reference to the command and stdout/stderr redirects assert re.match( r".*" - rf"cwltool:job.* \[job {process.id}\].*echo \\\n" + rf"(\[cwltool\]|cwltool:job.*) \[job {process.id}(_[0-9]+)?\].*echo \\\n" r"\s+'Dummy message' \> [\w\-/\.]+/stdout\.log 2\> [\w\-/\.]+/stderr\.log\n" r".*", log_data, re.MULTILINE | re.DOTALL - ), f"Information expected in:\n{log_data}" - assert f"[cwltool] [job {process.id}] completed success" in log_data + ), f"Command Information with Log redirects expected in:\n{log_data}" + assert re.match( + r".*" + rf"(\[cwltool\]|cwltool:job.*) \[job {process.id}(_[0-9]+)?\] completed success", + log_data, + re.MULTILINE | re.DOTALL + ), f"Information about successful job expected in:\n{log_data}" def test_stdout_stderr_logging_for_commandline_tool_failure(caplog): diff --git a/tests/test_provenance.py b/tests/test_provenance.py new file mode 100644 index 000000000..0a8480864 --- /dev/null +++ b/tests/test_provenance.py @@ -0,0 +1,187 @@ +import itertools + +import pytest + +from weaver.formats import ContentType, OutputFormat +from weaver.provenance import ProvenanceFormat, ProvenancePathType + + +@pytest.mark.prov +@pytest.mark.parametrize( + ["prov_method", "kwargs", "expected"], + [ + (ProvenancePathType.as_type, {}, None), + (ProvenancePathType.get, {}, None), + (ProvenancePathType.get, {"default": None}, None), + (ProvenancePathType.get, {"default": "default"}, "default"), + (ProvenancePathType.get, {"run_id": "1234"}, None), + (ProvenancePathType.get, {"run_id": "1234", "default": "default"}, "default"), + ] +) +def test_provenance_path_type_unresolved(prov_method, kwargs, expected): + result = prov_method("random", **kwargs) + assert result == expected + + +@pytest.mark.prov +@pytest.mark.parametrize( + ["provenance", "prov_run_id", "expect_path", "expect_type"], + [ + ("prov", None, ProvenancePathType.PROV, "prov"), + ("/prov", None, ProvenancePathType.PROV, "prov"), + ("info", None, ProvenancePathType.PROV_INFO, "info"), + ("/info", None, ProvenancePathType.PROV_INFO, "info"), + ("/prov/info", None, ProvenancePathType.PROV_INFO, "info"), + ("run", None, ProvenancePathType.PROV_RUN, "run"), + ("/run", None, ProvenancePathType.PROV_RUN, "run"), + ("/prov/run", None, ProvenancePathType.PROV_RUN, "run"), + ("run", "run-id", f"{ProvenancePathType.PROV_RUN}/run-id", "run"), + ("/run", "run-id", f"{ProvenancePathType.PROV_RUN}/run-id", "run"), + ("/prov/run", "run-id", f"{ProvenancePathType.PROV_RUN}/run-id", "run"), + ] +) +def test_provenance_path_type_resolution(provenance, prov_run_id, expect_path, expect_type): + result = ProvenancePathType.get(provenance, run_id=prov_run_id) + assert result == expect_path + result = ProvenancePathType.as_type(provenance) + assert result == expect_type + + +@pytest.mark.prov +def test_provenance_formats(): + result = ProvenanceFormat.formats() + expect = [ + ProvenanceFormat.PROV_JSON, + ProvenanceFormat.PROV_JSONLD, + ProvenanceFormat.PROV_TURTLE, + ProvenanceFormat.PROV_N, + ProvenanceFormat.PROV_XML, + ProvenanceFormat.PROV_XML, + ProvenanceFormat.PROV_NT, + ] + assert set(result) == set(expect) + + +@pytest.mark.prov +def test_provenance_media_types(): + result = ProvenanceFormat.media_types() + expect = [ + ContentType.APP_JSON, + ContentType.APP_JSONLD, + ContentType.TEXT_TURTLE, + ContentType.TEXT_PROVN, + ContentType.TEXT_XML, + ContentType.APP_XML, + ContentType.APP_NT, + ] + assert set(result) == set(expect) + + +@pytest.mark.prov +@pytest.mark.parametrize( + ["provenance", "expect"], + [ + (None, None), + ("prov-json", ProvenanceFormat.PROV_JSON), + ("PROV-JSON", ProvenanceFormat.PROV_JSON), + ("PROV-JSONLD", ProvenanceFormat.PROV_JSONLD), + ] +) +def test_provenance_format(provenance, expect): + result = ProvenanceFormat.get(provenance) + assert result == expect + + +@pytest.mark.prov +@pytest.mark.parametrize( + ["provenance", "expect"], + [ + (None, None), + (ProvenanceFormat.PROV_JSON, ContentType.APP_JSON), + (ProvenanceFormat.PROV_JSONLD, ContentType.APP_JSONLD), + (ProvenanceFormat.PROV_XML, ContentType.APP_XML), + (ProvenanceFormat.PROV_NT, ContentType.APP_NT), + (ProvenanceFormat.PROV_N, ContentType.TEXT_PROVN), + (ProvenanceFormat.PROV_TURTLE, ContentType.TEXT_TURTLE), + ] +) +def test_provenance_as_media_type(provenance, expect): + result = ProvenanceFormat.as_media_type(provenance) + assert result == expect + + +@pytest.mark.prov +@pytest.mark.parametrize( + ["prov", "prov_format", "output_format", "expect", "is_error"], + [ + (None, None, None, ProvenanceFormat.PROV_JSON, False), + # only main PROV path allow format variants + (ProvenancePathType.PROV, None, None, ProvenanceFormat.PROV_JSON, False), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_JSON, None, ProvenanceFormat.PROV_JSON, False), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_JSONLD, None, ProvenanceFormat.PROV_JSONLD, False), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_XML, None, ProvenanceFormat.PROV_XML, False), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_NT, None, ProvenanceFormat.PROV_NT, False), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_N, None, ProvenanceFormat.PROV_N, False), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_TURTLE, None, ProvenanceFormat.PROV_TURTLE, False), + # validate implicit mapping via output format + (ProvenancePathType.PROV, None, OutputFormat.JSON, ProvenanceFormat.PROV_JSON, False), + (ProvenancePathType.PROV, None, OutputFormat.JSON_RAW, ProvenanceFormat.PROV_JSON, False), + (ProvenancePathType.PROV, None, OutputFormat.JSON_STR, ProvenanceFormat.PROV_JSON, False), + (ProvenancePathType.PROV, None, OutputFormat.YAML, ProvenanceFormat.PROV_JSON, False), + (ProvenancePathType.PROV, None, OutputFormat.YML, ProvenanceFormat.PROV_JSON, False), + (ProvenancePathType.PROV, None, OutputFormat.XML, ProvenanceFormat.PROV_XML, False), + (ProvenancePathType.PROV, None, OutputFormat.TEXT, ProvenanceFormat.PROV_N, False), + (ProvenancePathType.PROV, None, OutputFormat.TXT, ProvenanceFormat.PROV_N, False), + # check some combinations considered invalid + (ProvenancePathType.PROV, ProvenanceFormat.PROV_N, OutputFormat.JSON, None, True), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_N, OutputFormat.XML, None, True), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_NT, OutputFormat.JSON, None, True), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_NT, OutputFormat.XML, None, True), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_XML, OutputFormat.JSON_RAW, None, True), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_JSON, OutputFormat.XML, None, True), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_TURTLE, OutputFormat.JSON, None, True), + (ProvenancePathType.PROV, None, OutputFormat.HTML, None, True), + (ProvenancePathType.PROV, ProvenanceFormat.PROV_JSON, OutputFormat.TEXT, None, True), + (ProvenancePathType.PROV_INFO, None, OutputFormat.JSON, None, True), + (ProvenancePathType.PROV_INFO, ProvenanceFormat.PROV_JSON, OutputFormat.JSON, None, True), + ] + + + [ + # all but the main PROV paths are text-only + # no output format, so it default to None resolved, and no error + (_prov, _prov_fmt, None, None, False) + for _prov, _prov_fmt + in itertools.product( + set(ProvenancePathType.types()) - {ProvenancePathType.as_type(ProvenancePathType.PROV)}, + ProvenanceFormat.values(), + ) + ] + + + [ + # all but the main PROV paths are text-only + # if anything is specified other than text, it's an error + (_prov, _prov_fmt, _out_fmt, None, True) + for _prov, _prov_fmt, _out_fmt + in itertools.product( + set(ProvenancePathType.types()) - {ProvenancePathType.as_type(ProvenancePathType.PROV)}, + ProvenanceFormat.values(), + set(OutputFormat.values()) - {OutputFormat.TEXT, OutputFormat.TXT}, + ) + ] + + + [ + # all but the main PROV paths are text-only + # valid if the output format is text + (_prov, _prov_fmt, _out_fmt, None, False) + for _prov, _prov_fmt, _out_fmt + in itertools.product( + set(ProvenancePathType.types()) - {ProvenancePathType.as_type(ProvenancePathType.PROV)}, + ProvenanceFormat.values(), + [OutputFormat.TEXT, OutputFormat.TXT], + ) + ] +) +def test_provenance_format_compatible(prov, prov_format, output_format, expect, is_error): + result, error = ProvenanceFormat.resolve_compatible_formats(prov, prov_format, output_format) + assert result == expect + assert error if is_error else error is None, "When an error is expected, a string detailing it should be returned." diff --git a/weaver/base.py b/weaver/base.py index 4631c51ce..0e44965d1 100644 --- a/weaver/base.py +++ b/weaver/base.py @@ -39,7 +39,11 @@ class Constants(object, metaclass=_Const): @classmethod def __members__(cls): members = set(cls.__dict__) - set(object.__dict__) - members = [member for member in members if not inspect.ismethod(getattr(cls, member))] + members = [ + member for member in members + if not isinstance(object.__getattribute__(cls, member), classmethod) + and not inspect.ismethod(getattr(cls, member)) + ] return [member for member in members if not isinstance(member, str) or not member.startswith("_")] @classmethod @@ -109,19 +113,19 @@ class classproperty(property): # pylint: disable=C0103,invalid-name .. seealso:: https://stackoverflow.com/a/5191224 """ - - def __init__(self, - fget=None, # type: Optional[Callable[[object], PropertyDataTypeT]] - fset=None, # type: Optional[Callable[[object, PropertyDataTypeT], None]] - fdel=None, # type: Optional[Callable[[object], None]] - doc="", # type: str - ): # type: (...) -> None + def __init__( + self, + fget=None, # type: Optional[Callable[[object], PropertyDataTypeT]] + fset=None, # type: Optional[Callable[[object, PropertyDataTypeT], None]] + fdel=None, # type: Optional[Callable[[object], None]] + doc="", # type: str + ): # type: (...) -> None super(classproperty, self).__init__(fget=fget, fset=fset, fdel=fdel, doc=doc) self.__doc__ = inspect.cleandoc(doc) - def __get__(self, cls, owner): # noqa - # type: (Type[object], Any) -> PropertyDataTypeT - return classmethod(self.fget).__get__(None, owner)() + def __get__(self, instance, owner=None): + # type: (Any, Optional[Type[object]]) -> PropertyDataTypeT + return self.fget.__get__(None, owner)(instance or owner) # pylint: disable=E1101,no-member # false-positive class _EnumMeta(enum.EnumMeta): diff --git a/weaver/cli.py b/weaver/cli.py index e194a98e3..dbfcec97f 100644 --- a/weaver/cli.py +++ b/weaver/cli.py @@ -35,6 +35,7 @@ ) from weaver.processes.utils import get_process_information from weaver.processes.wps_package import get_process_definition +from weaver.provenance import ProvenanceFormat, ProvenancePathType from weaver.sort import Sort, SortMethods from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status from weaver.utils import ( @@ -68,10 +69,12 @@ # https://github.com/ashb/sphinx-argparse/issues/7 try: from weaver.typedefs import ( + URL, AnyHeadersContainer, AnyRequestMethod, AnyRequestType, AnyResponseType, + AnyUUID, CookiesType, CWL, CWL_IO_ValueMap, @@ -97,6 +100,8 @@ ExecutionResultValue = Union[ExecutionResultObjectRef, List[ExecutionResultObjectRef]] JobSubscribers = Dict[str, Any] HeadersType = Dict[str, str] + URL = str + AnyUUID = str try: from weaver.formats import AnyOutputFormat from weaver.processes.constants import ProcessSchemaType @@ -110,7 +115,8 @@ ConditionalGroup = Tuple[argparse._ActionsContainer, bool, bool] # noqa PostHelpFormatter = Callable[[str], str] - ArgumentParserRule = Tuple[argparse._ActionsContainer, Callable[[argparse.Namespace], Optional[bool]], str] # noqa + ArgumentParserRuleCheck = Callable[[argparse.Namespace], Optional[Union[bool, str]]] + ArgumentParserRule = Tuple[argparse._ActionsContainer, ArgumentParserRuleCheck, str] # noqa LOGGER = logging.getLogger("weaver.cli") # do not use '__name__' since it becomes '__main__' from CLI call @@ -149,7 +155,7 @@ def __init__( self.success = success self.message = message self.headers = ResponseHeaders(headers) if headers is not None else None - self.body = body + self.body = body or text self.text = text self.code = code @@ -633,8 +639,8 @@ def _parse_deploy_package( return OperationResult(True, p_id, body) def _parse_job_ref(self, job_reference, url=None): - # type: (str, Optional[str]) -> Tuple[Optional[str], Optional[str]] - if job_reference.startswith("http"): + # type: (Union[URL, AnyUUID], Optional[str]) -> Tuple[Optional[str], Optional[str]] + if str(job_reference).startswith("http"): job_url = job_reference job_parts = [part for part in job_url.split("/") if part.strip()] job_id = job_parts[-1] @@ -1526,7 +1532,7 @@ def execute( def trigger_job( self, - job_reference, # type: str + job_reference, # type: Union[URL, AnyUUID] url=None, # type: Optional[str] auth=None, # type: Optional[AuthBase] headers=None, # type: Optional[AnyHeadersContainer] @@ -1570,7 +1576,7 @@ def trigger_job( def update_job( self, - job_reference, # type: str + job_reference, # type: Union[URL, AnyUUID] title=null, # type: Union[Type[null], Optional[str]] inputs=None, # type: Optional[Union[str, ExecutionInputs, CWL_IO_ValueMap]] subscribers=None, # type: Optional[JobSubscribers] @@ -1857,7 +1863,7 @@ def jobs( def status( self, - job_reference, # type: str + job_reference, # type: Union[URL, AnyUUID] url=None, # type: Optional[str] auth=None, # type: Optional[AuthBase] headers=None, # type: Optional[AnyHeadersContainer] @@ -1898,7 +1904,7 @@ def status( def _job_info( self, x_path, # type: str - job_reference, # type: str + job_reference, # type: Union[URL, AnyUUID] url=None, # type: Optional[str] auth=None, # type: Optional[AuthBase] headers=None, # type: Optional[AnyHeadersContainer] @@ -1914,7 +1920,7 @@ def _job_info( The :term:`Job` must be in the expected status to retrieve relevant information. .. seealso:: - :ref:`proc_op_result` + :ref:`proc_op_status` :param job_reference: Either the full :term:`Job` status URL or only its UUID. :param url: Instance URL if not already provided during client creation. @@ -1959,6 +1965,85 @@ def statistics(self, *args, **kwargs): stats = statistics # alias + def provenance( + self, + job_reference, # type: Union[URL, AnyUUID] + prov=None, # type: Optional[ProvenancePathType] + prov_run_id=None, # type: Optional[AnyUUID] + prov_format=None, # type: Optional[ProvenanceFormat] + output_format=None, # type: Optional[AnyOutputFormat] + url=None, # type: Optional[str] + auth=None, # type: Optional[AuthBase] + headers=None, # type: Optional[AnyHeadersContainer] + **kwargs, # type: Any + ): # type: (...) -> OperationResult + """ + Obtain the :term:`Provenance` metadata from a successful :term:`Job` execution. + + The :term:`Job` must be in the expected status to retrieve relevant information. + + .. seealso:: + - :ref:`proc_op_status` + - :ref:`proc_op_job_prov` + + :param job_reference: Either the full :term:`Job` status URL or only its UUID. + :param prov: + Type of :term:`Provenance` metadata to retrieve, as expressed by relative path. + For example, retrieving an execution run metadata can be requested with ``run``, ``/run`` or ``/prov/run``. + Available relative paths are as per defined by the :term:`API` endpoints (see :class:`ProvenancePathType`). + Can be combined in certain cases with a :paramref:`run_id` to obtain only the metadata of a nested step + within a :term:`Workflow` execution. If omitted, returns the main :term:`Provenance` metadata + representation as per the requested :paramref:`prov_format` (see :class:`ProvenanceFormat`). + :param prov_run_id: + Specific run (i.e.: a nested :term:`Workflow` step) for which to retrieve :term:`Provenance` metadata. + Applicable IDs will typically correspond to the underlying :term:`Job` ID that would have been created + for the corresponding steps, but could differ in particular situations. + To make sure, the top-most ``PROV`` metadata should be inspected to extract relevant run IDs. + :param prov_format: + Desired :term:`Provenance` metadata representation (see :class:`ProvenanceFormat`). + Applicable only when retrieving the ``PROV`` details (i.e.: :paramref:`prov` must be ``None`` or ``/prov``). + Ignored otherwise. + Can be combined with :paramref:`output_format` to convert the representation into semantically equivalent + representations. For example, :attr:`ProvenanceFormat.PROV_JSON` could be converted into the corresponding + :term:`YAML` representation using :attr:`OutputFormat.YAML`. However, this is limited only to directly + mappable representations (i.e.: :term:`JSON`, :term:`YAML`, :term:`XML`). + :param output_format: + Select an alternate output representation of the result body contents. + See also :paramref:`prov_format` for even more format combinations specific to :term:`Provenance` metadata. + :param url: Instance URL if not already provided during client creation. + :param auth: + Instance authentication handler if not already created during client creation. + Should perform required adjustments to request to allow access control of protected contents. + :param headers: + Additional headers to employ when sending request. + Note that this can break functionalities if expected headers are overridden. Use with care. + :returns: Retrieved information from the :term:`Job`. + """ + prov_path = ProvenancePathType.get(prov, run_id=prov_run_id, default=ProvenancePathType.PROV) + prov_format, err_msg = ProvenanceFormat.resolve_compatible_formats(prov, prov_format, output_format) + if err_msg: + return OperationResult(False, message=err_msg) + if prov_format: + prov_ctype = ProvenanceFormat.as_media_type(prov_format) + if prov_ctype: + headers = CaseInsensitiveDict(headers or {}) + headers["Accept"] = prov_ctype + if prov_path != ProvenancePathType.PROV: + headers = CaseInsensitiveDict(headers or {}) + headers["Accept"] = ContentType.TEXT_PLAIN + result = self._job_info( + prov_path, + job_reference, + url=url, + auth=auth, + headers=headers, + output_format=output_format, + **kwargs, + ) + return result + + prov = provenance # alias + def monitor( self, job_reference, # type: str @@ -2598,6 +2683,55 @@ def add_timeout_param(parser): ) +def add_provenance_params(parser): + # type: (argparse.ArgumentParser) -> None + parser.add_argument( + "-pT", "--prov", "--prov-type", dest="prov", + choices=ProvenancePathType.types(), + help=( + "Desired PROV metadata contents. " + "The main PROV metadata supports multiple representations. " + "All others are only available as plain text." + ) + ) + parser.add_argument( + "-pF", "--prov-format", dest="prov_format", + choices=ProvenanceFormat.formats(), + help=( + "Desired PROV metadata schema representation. " + "Applicable formats depend on the PROV metadata type being requested. " + "Can be combined with -F/--format to transform the result to an alternate representation if compatible. " + "Note that certain request headers will be overridden to obtain the requested format even if they are " + "explicitly specified by the corresponding -H/--header option." + ) + ) + parser.add_argument( + "-pR", "--run", "--prov-run", dest="prov_run_id", + choices=ProvenancePathType.types(), + help=( + "Specific run (i.e.: a nested Workflow step) for which to retrieve Provenance metadata. " + "Applicable IDs will typically correspond to the underlying Job ID that would have been " + "created for the corresponding steps, but could differ in particular situations. " + "To make sure, the top-most PROV metadata should be inspected to extract relevant run IDs." + ) + ) + + +def check_compatible_prov_formats(ns): + # type: (argparse.Namespace) -> Optional[str] + """ + Check multiple output format and PROV format for valid combinations. + + If valid, update the arguments to make them work during invocation. + Otherwise, return the relevant error to fail argument validation and print the error message. + """ + prov_format, err_msg = ProvenanceFormat.resolve_compatible_formats(ns.prov, ns.prov_format, ns.output_format) + if err_msg: + return err_msg + if prov_format: + ns.prov_format = prov_format + + class SubscriberAction(argparse.Action): """ Action that will validate that the input argument references a valid subscriber argument. @@ -2996,8 +3130,8 @@ def format_help(self): self.help_mode = False return text - def add_rule(self, rule, failure): - # type: (Callable[[argparse.Namespace], Optional[bool]], str) -> None + def add_rule(self, rule, failure=None): + # type: (ArgumentParserRuleCheck, Optional[str]) -> None self._rules.add((self, rule, failure)) def parse_known_args(self, args=None, namespace=None): @@ -3011,7 +3145,10 @@ def parse_known_args(self, args=None, namespace=None): """ ns, args = super(WeaverArgumentParser, self).parse_known_args(args=args, namespace=namespace) for container, rule, failure in self._rules: - if rule(ns) not in [None, True]: + result = rule(ns) + if result not in [None, True]: + if isinstance(result, str): + failure = f"{failure} because {result}" container.error(failure) return ns, args @@ -3398,6 +3535,25 @@ def make_parser(): add_job_ref_param(op_statistics) add_shared_options(op_statistics) + op_provenance = WeaverArgumentParser( + "provenance", + description=( + "Obtain the provenance metadata of a job using a reference UUID or URL. " + "Different W3C PROV representations can be retrieved according to specified format options. " + "Furthermore, different parts of the provenance metadata can be extracted." + ), + formatter_class=ParagraphFormatter, + ) + set_parser_sections(op_provenance) + add_url_param(op_provenance, required=False) + add_job_ref_param(op_provenance) + add_shared_options(op_provenance) + add_provenance_params(op_provenance) + op_provenance.add_rule( + check_compatible_prov_formats, + "specified options for -pF/--prov-format and -F/--format are not compatible", + ) + op_results = WeaverArgumentParser( "results", description=( @@ -3472,6 +3628,7 @@ def make_parser(): op_logs, op_exceptions, op_statistics, + op_provenance, op_results, op_upload, ] @@ -3479,6 +3636,7 @@ def make_parser(): "processes": op_capabilities, "errors": op_exceptions, "stats": op_statistics, + "prov": op_provenance, } for op_parser in operations: op_aliases = [alias for alias, op_alias in aliases.items() if op_alias is op_parser] diff --git a/weaver/datatype.py b/weaver/datatype.py index 5b79d8f2e..d10949581 100644 --- a/weaver/datatype.py +++ b/weaver/datatype.py @@ -6,6 +6,7 @@ import copy import enum import inspect +import io import json import os import re @@ -26,6 +27,7 @@ import pyramid.httpexceptions import requests.exceptions from cryptography.fernet import Fernet +from cwlprov.tool import Tool as CWLProvTool from dateutil.parser import parse as dt_parse from docker.auth import decode_auth # pylint: disable=E0611 from owslib.util import ServiceException as OWSServiceException @@ -55,6 +57,7 @@ ) from weaver.processes.convert import get_field, json2oas_io, normalize_ordered_io, null, ows2json, wps2json_io from weaver.processes.types import ProcessType +from weaver.provenance import ProvenanceFormat from weaver.quotation.status import QuoteStatus from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status from weaver.store.base import StoreProcesses @@ -76,7 +79,7 @@ ) from weaver.visibility import Visibility from weaver.warning import NonBreakingExceptionWarning, UnsupportedOperationWarning -from weaver.wps.utils import get_wps_client, get_wps_url +from weaver.wps.utils import get_wps_client, get_wps_output_dir, get_wps_url from weaver.wps_restapi import swagger_definitions as sd from weaver.wps_restapi.utils import get_wps_restapi_base_url @@ -94,8 +97,10 @@ AnyExecuteReturnPreference, AnyExecuteTransmissionMode ) + from weaver.formats import AnyContentType from weaver.processes.constants import ProcessSchemaType from weaver.processes.types import AnyProcessType + from weaver.provenance import AnyProvenanceFormat, ProvenancePathType from weaver.quotation.status import AnyQuoteStatus from weaver.status import AnyStatusType, StatusType from weaver.typedefs import ( @@ -1403,18 +1408,20 @@ def response(self, response): response = xml_util.tostring(response) self["response"] = response - def _job_url(self, base_url): - # type: (str) -> str + def process_url(self, container=None): + # type: (Optional[AnySettingsContainer], Optional[str]) -> str + settings = get_settings(container) + base_url = get_wps_restapi_base_url(settings) if self.service is not None: base_url += sd.provider_service.path.format(provider_id=self.service) - job_path = sd.process_job_service.path.format(process_id=self.process, job_id=self.id) - return base_url + job_path + proc_url = sd.process_service.path.format(process_id=self.process) + return base_url + proc_url def job_url(self, container=None, extra_path=None): # type: (Optional[AnySettingsContainer], Optional[str]) -> str - settings = get_settings(container) - base_url = get_wps_restapi_base_url(settings) - return self._job_url(base_url) + (extra_path or "") + proc_url = self.process_url(container) + job_url = sd.job_service.path.format(job_id=self.id) + return proc_url + job_url + (extra_path or "") def status_url(self, container=None): # type: (Optional[AnySettingsContainer]) -> str @@ -1466,6 +1473,74 @@ def result_path(self, job_id=None, output_id=None, file_name=None): result_job_path = os.path.join(result_job_path, file_name) return result_job_path + def prov_url(self, container=None, extra_path=None): + # type: (Optional[AnySettingsContainer], Optional[ProvenancePathType]) -> str + extra_path = str(extra_path or "") + prov_path = f"/prov{extra_path}" + return self.job_url(container=container, extra_path=prov_path) + + def prov_path(self, container=None, extra_path=None, prov_format=None): + # type: (Optional[AnySettingsContainer], Optional[ProvenancePathType], Optional[AnyProvenanceFormat]) -> str + """ + Obtain the relative path of the ``PROV`` contents. + """ + job_path = self.result_path() + prov_path = f"{job_path}-prov" + prov_format = ProvenanceFormat.get(prov_format, allow_media_type=True) + _prov_path_mapping = { + (None, None): prov_path, # the directory itself with all metadata + ("/prov", None): f"{prov_path}/metadata/provenance/primary.cwlprov.json", + ("/prov", ProvenanceFormat.PROV_JSON): f"{prov_path}/metadata/provenance/primary.cwlprov.json", + ("/prov", ProvenanceFormat.PROV_JSONLD): f"{prov_path}/metadata/provenance/primary.cwlprov.jsonld", + ("/prov", ProvenanceFormat.PROV_TURTLE): f"{prov_path}/metadata/provenance/primary.cwlprov.ttl", + ("/prov", ProvenanceFormat.PROV_XML): f"{prov_path}/metadata/provenance/primary.cwlprov.xml", + ("/prov", ProvenanceFormat.PROV_N): f"{prov_path}/metadata/provenance/primary.cwlprov.provn", + ("/prov", ProvenanceFormat.PROV_NT): f"{prov_path}/metadata/provenance/primary.cwlprov.nt", + } # type: Dict[Tuple[Optional[ProvenancePathType], ProvenanceFormat], str] + key = (extra_path, prov_format) + resolved_path = _prov_path_mapping.get(key) + if resolved_path: + out_dir = get_wps_output_dir(container) + return os.path.join(out_dir, resolved_path) + return resolved_path + + def prov_data( + self, + container=None, # type: Optional[AnySettingsContainer] + extra_path=None, # type: Optional[ProvenancePathType] + prov_format=None, # type: AnyContentType + ): # type: (...) -> Tuple[Optional[str], Optional[AnyContentType]] + """ + Read or retrieve data from the packaged provenance directory contents associated to the :term:`Job`. + """ + prov_path = self.prov_path(container=container, extra_path=extra_path, prov_format=prov_format) + if prov_path and os.path.isfile(prov_path): + with open(prov_path, mode="r", encoding="utf-8") as prov_f: + data = prov_f.read() + fmt = prov_format + else: + prov_path = self.prov_path(container=container) + if not prov_path or not os.path.isdir(prov_path): + return None, None + path = str(extra_path).split("/prov/", 1)[-1] + frag = path.strip("/").split("/") + oper, params = frag[0], frag[1:] + args = ["-d", prov_path, oper] + if oper == "run": + args.extend(["--steps", "--start", "--end", "--duration", "--labels", "--inputs", "--outputs"]) + elif oper in ["inputs", "outputs"]: + args.extend(["--parameters", "--format", "uris"]) + args.extend(params) + tool = CWLProvTool(args) + tool.output = io.StringIO() # override the buffer argument to "print" + result = tool.main() # noqa # function annotated to return nothing, but sometimes returns an error code + if result not in [0, None]: + return None, None + tool.output.seek(0) + data = tool.output.read() + fmt = ContentType.TEXT_PLAIN + return data, fmt + def links(self, container=None, self_link=None): # type: (Optional[AnySettingsContainer], Optional[str]) -> List[Link] """ @@ -1480,7 +1555,7 @@ def links(self, container=None, self_link=None): settings = get_settings(container) html_on = settings.get("weaver.wps_restapi_html", True) base_url = get_wps_restapi_base_url(settings) - job_url = self._job_url(base_url) # full URL + job_url = self.job_url(settings) # full URL job_path = base_url + sd.job_service.path.format(job_id=self.id) job_exec = f"{job_url.rsplit('/', 1)[0]}/execution" job_list = base_url + sd.jobs_service.path @@ -1506,7 +1581,6 @@ def links(self, container=None, self_link=None): if self_link in ["status", None]: job_links.extend([ {"href": job_list, "rel": "collection", "title": "List of submitted jobs."}, # IANA - ]) if self.status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]: @@ -1519,6 +1593,10 @@ def links(self, container=None, self_link=None): "title": "Job results of successful process execution (direct output values mapping)."}, {"href": f"{job_url}/statistics", "rel": "statistics", # unofficial "title": "Job statistics collected following process execution."}, + {"href": f"{job_url}/prov", "rel": "provenance", # unofficial + "title": "Job provenance collected following process execution."}, + {"href": f"{job_url}/prov", "rel": "https://www.w3.org/ns/prov", # unofficial + "title": "Job provenance collected following process execution."}, ]) else: job_links.append({ @@ -1529,7 +1607,7 @@ def links(self, container=None, self_link=None): "href": f"{job_url}/logs", "rel": "logs", # unofficial "title": "List of collected job logs during process execution." }) - if self_link in ["status", "inputs", "outputs", "results", "logs", "exceptions"]: + if self_link in ["status", "inputs", "outputs", "results", "logs", "exceptions", "provenance"]: self_link_body = list(filter(lambda _link: _link["rel"].endswith(self_link), job_links))[-1] self_link_body = copy.deepcopy(self_link_body) # back to specific job if we are in one of its sub-endpoints @@ -1542,8 +1620,8 @@ def links(self, container=None, self_link=None): job_links.extend([self_link_body, self_link_up]) link_meta = {"type": ContentType.APP_JSON, "hreflang": AcceptLanguage.EN_CA} for link in job_links: - for meta, parma in link_meta.items(): - link.setdefault(meta, parma) + for meta, param in link_meta.items(): + link.setdefault(meta, param) return job_links def json(self, container=None): # pylint: disable=W0221,arguments-differ @@ -1956,7 +2034,7 @@ def authorized(cls, file, token): return compare_digest(str(access), str(token)) def encrypt(self, file): - # type: (IO[bytes|str]) -> BytesIO + # type: (IO[Union[bytes, str]]) -> BytesIO """ Encrypt file data using a secret to avoid plain text contents during temporary :term:`Vault` storage. @@ -1972,7 +2050,7 @@ def encrypt(self, file): return BytesIO(digest) def decrypt(self, file): - # type: (IO[bytes|str]) -> BytesIO + # type: (IO[Union[bytes, str]]) -> BytesIO """ Decrypt file contents using secret. """ diff --git a/weaver/formats.py b/weaver/formats.py index cddc7dcf9..449429f7c 100644 --- a/weaver/formats.py +++ b/weaver/formats.py @@ -96,11 +96,13 @@ class ContentType(Constants): APP_GZIP = "application/gzip" APP_HDF5 = "application/x-hdf5" APP_JSON = "application/json" + APP_JSONLD = "application/ld+json" APP_RAW_JSON = "application/raw+json" APP_OAS_JSON = "application/vnd.oai.openapi+json; version=3.0" APP_OGC_PKG_JSON = "application/ogcapppkg+json" APP_OGC_PKG_YAML = "application/ogcapppkg+yaml" APP_NETCDF = "application/x-netcdf" + APP_NT = "application/n-triples" APP_OCTET_STREAM = "application/octet-stream" APP_PDF = "application/pdf" APP_TAR = "application/x-tar" # map to existing gzip for CWL @@ -125,6 +127,8 @@ class ContentType(Constants): TEXT_PLAIN = "text/plain" TEXT_RICHTEXT = "text/richtext" TEXT_XML = "text/xml" + TEXT_PROVN = "text/provenance-notation" + TEXT_TURTLE = "text/turtle" VIDEO_MPEG = "video/mpeg" # special handling diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index c0d4d7bf2..7d301df82 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -11,7 +11,6 @@ - `WPS-REST schemas `_ - :mod:`weaver.wps_restapi.api` conformance details """ - import copy import json import logging @@ -30,10 +29,12 @@ import cwltool.process import yaml from cwltool.context import LoadingContext, RuntimeContext +from cwltool.cwlprov.writablebagfile import close_ro, packed_workflow from cwltool.factory import Factory as CWLFactory, WorkflowStatus as CWLException from cwltool.process import shortname, use_custom_schema from cwltool.secrets import SecretStore from pyramid.httpexceptions import HTTPOk, HTTPServiceUnavailable +from pyramid.settings import asbool from pywps import Process from pywps.inout.basic import SOURCE_TYPE, DataHandler, FileHandler, IOHandler, NoneIOHandler from pywps.inout.formats import Format @@ -128,6 +129,7 @@ from weaver.processes.sources import retrieve_data_source_url from weaver.processes.types import ProcessType from weaver.processes.utils import load_package_file, map_progress, pull_docker +from weaver.provenance import WeaverResearchObject from weaver.status import STATUS_PYWPS_IDS, Status, StatusCompliant, map_status from weaver.store.base import StoreJobs, StoreProcesses from weaver.utils import ( @@ -1811,6 +1813,50 @@ def setup_runtime(self): } return runtime_params + def setup_provenance(self, loading_context, runtime_context): + # type: (LoadingContext, RuntimeContext) -> None + """ + Configure ``PROV`` runtime options. + + .. seealso:: + - https://www.w3.org/TR/prov-overview/ + - https://cwltool.readthedocs.io/en/latest/CWLProv.html + - https://docs.ogc.org/DRAFTS/24-051.html#_requirements_class_provenance + """ + weaver_cwl_prov = asbool(self.settings.get("weaver.cwl_prov", True)) + if not weaver_cwl_prov: + loading_context.research_obj = None + runtime_context.research_obj = None + runtime_context.prov_obj = None + return + + runtime_context.prov_user = loading_context.user_provenance = True + runtime_context.prov_host = loading_context.host_provenance = True + + if not runtime_context.research_obj: + ro = WeaverResearchObject( + self.job, # align the RO definition with the job (make the UUIDs equal) + self.settings, + runtime_context.make_fs_access(""), + temp_prefix_ro=runtime_context.tmpdir_prefix, + orcid=runtime_context.orcid, + full_name=runtime_context.cwl_full_name, + ) + + loading_context.research_obj = ro + runtime_context.research_obj = ro + + def finalize_provenance(self, runtime_context): + # type: (RuntimeContext) -> None + if runtime_context.research_obj: + # perform packaging of the workflow + packed_wf_str = repr_json(self.package, force_string=True, indent=2) + packed_workflow(runtime_context.research_obj, packed_wf_str) + + # sign-off and persist completed PROV + prov_dir = self.job.prov_path(self.settings) + close_ro(runtime_context.research_obj, prov_dir) + def update_requirements(self): # type: () -> None """ @@ -2113,13 +2159,10 @@ def _handler(self, request, response): elif config == WeaverConfiguration.HYBRID: self.remote_execution = problem_needs_remote is not None + loading_context = LoadingContext() if self.remote_execution: # EMS/Hybrid dispatch the execution to ADES or remote WPS - loading_context = LoadingContext() loading_context.construct_tool_object = self.make_tool - else: - # ADES/Hybrid execute the CWL/AppPackage locally - loading_context = None self.update_effective_user() self.update_requirements() @@ -2132,6 +2175,7 @@ def _handler(self, request, response): ) runtime_context = RuntimeContext(kwargs=runtime_params) runtime_context.secret_store = SecretStore() # pre-allocate to reuse the same references as needed + self.setup_provenance(loading_context, runtime_context) try: self.step_launched = [] package_inst, _, self.step_packages = _load_package_content(self.package, @@ -2203,6 +2247,15 @@ def _handler(self, request, response): self.update_status("Generate package outputs done.", PACKAGE_PROGRESS_PREP_OUT, Status.RUNNING) except Exception as exc: raise self.exception_message(PackageExecutionError, exc, "Failed to save package outputs.") + try: + self.finalize_provenance(runtime_context) + except Exception as exc: # pragma: no cover # only safeguard, it's good if this branch never occurs! + self.exception_message( + PackageExecutionError, + exc, + "Failed to save package PROV metadata. Ignoring error to avoid failing execution.", + level=logging.WARN, + ) except Exception: # return log file location by status message since outputs are not obtained by WPS failed process log_url = f"{get_wps_output_url(self.settings)}/{self.uuid}.log" diff --git a/weaver/provenance.py b/weaver/provenance.py new file mode 100644 index 000000000..648674331 --- /dev/null +++ b/weaver/provenance.py @@ -0,0 +1,359 @@ +""" +Definitions related to :term:`Provenance` features and the :term:`W3C` ``PROV`` specification. +""" +import hashlib +from typing import TYPE_CHECKING, cast +from urllib.parse import urlparse + +from cwltool.cwlprov import provenance_constants as cwl_prov_const +from cwltool.cwlprov.ro import ResearchObject +from prov import constants as prov_const + +from weaver.__meta__ import __version__ as weaver_version +from weaver.base import Constants +from weaver.formats import ContentType, OutputFormat +from weaver.utils import get_weaver_url + +if TYPE_CHECKING: + from typing import Any, List, Optional, Tuple, Union + from uuid import UUID + + from cwltool.cwlprov.provenance_profile import ProvenanceProfile + from cwltool.stdfsaccess import StdFsAccess + from prov.model import ProvDocument + + from weaver.base import EnumType + from weaver.datatype import Job + from weaver.formats import AnyContentType + from weaver.typedefs import AnyKey, AnySettingsContainer + + AnyProvenanceFormat = Union[AnyContentType, "ProvenanceFormat"] + + +class ProvenancePathType(Constants): + PROV = "/prov" + PROV_INFO = "/prov/info" + PROV_WHO = "/prov/who" + PROV_INPUTS = "/prov/inputs" + PROV_OUTPUTS = "/prov/outputs" + PROV_RUN = "/prov/run" + PROV_RUNS = "/prov/runs" + + @classmethod + def types(cls): + # type: () -> List[str] + return [cls.as_type(prov) for prov in cls.values()] + + @classmethod + def as_type(cls, prov): + # type: (Any) -> Optional[str] + prov = cls.get(prov) + if isinstance(prov, str): + return prov.rsplit("/", 1)[-1] + return None + + @classmethod + def get( # pylint: disable=W0221,W0237 # arguments differ/renamed for clarity + cls, + prov, # type: Union[AnyKey, EnumType, "ProvenancePathType"] + default=None, # type: Optional[Any] + run_id=None, # type: Optional[str] + ): # type: (...) -> Optional["ProvenancePathType"] + prov_found = super().get(prov) + if prov_found is not None and run_id is None: + return prov_found + if isinstance(prov, str): + if not prov_found and prov.strip("/") not in ProvenancePathType.types(): + return default + prov = f"/{prov}" if not prov.startswith("/") else prov + prov = f"/prov{prov}" if not prov.startswith("/prov") else prov + if run_id: + if prov.rsplit("/", 1)[-1] in ["run", "inputs", "outputs"]: + prov = f"{prov}/{run_id}" + else: + return default + return cast("ProvenancePathType", prov) + return default + + +class ProvenanceFormat(Constants): + PROV_JSON = "PROV-JSON" + PROV_JSONLD = "PROV-JSONLD" + PROV_XML = "PROV-XML" + PROV_TURTLE = "PROV-TURTLE" + PROV_N = "PROV-N" + PROV_NT = "PROV-NT" + + _media_types = { + ContentType.APP_JSON: PROV_JSON, + ContentType.APP_JSONLD: PROV_JSONLD, + ContentType.TEXT_TURTLE: PROV_TURTLE, + ContentType.TEXT_PROVN: PROV_N, + ContentType.TEXT_XML: PROV_XML, + ContentType.APP_XML: PROV_XML, + ContentType.APP_NT: PROV_NT, + } + _rev_path_types = {_prov_type: _ctype for _ctype, _prov_type in _media_types.items()} + + @classmethod + def get( # pylint: disable=W0221,W0237 # arguments differ/renamed for clarity + cls, + prov_format, # type: Optional[AnyProvenanceFormat] + default=None, # type: Optional[Any] + allow_media_type=False, # type: bool + ): # type: (...) -> Optional["ProvenanceFormat"] + prov = super().get(prov_format, default=default) + if prov is None and allow_media_type: + prov = cls._media_types.get(prov_format) + return prov + return prov + + @classmethod + def media_types(cls): + # type: () -> List[ContentType] + return list(cls._media_types) + + @classmethod + def formats(cls): + # type: () -> List["ProvenanceFormat"] + return cls.values() + + @classmethod + def as_media_type(cls, prov_format): + # type: (Optional[AnyProvenanceFormat]) -> Optional[AnyContentType] + return cls._rev_path_types.get(prov_format) + + @classmethod + def resolve_compatible_formats( + cls, + prov, # type: Optional[Union[ProvenancePathType, str]] + prov_format, # type: Optional[Union[ProvenanceFormat, str]] + output_format, # type: Optional[Union[OutputFormat, str]] + ): # type: (...) -> Tuple[Optional[ProvenanceFormat], Optional[str]] + """ + Resolves multiple :class:`OutputFormat` and :class:`ProvenanceFormat` combinations for compatible formats. + + Compatible formats depend on the PROV endpoint being requested. + If output format is not specified, apply the corresponding PROV format that will work transparently. + Otherwise, ensure they are aligned against the expected PROV endpoints and supported :term:`Media-Types`. + + :returns: + Tuple of a resolved PROV format if only the output format was specified, + and the relevant error detail if they are incompatible. + """ + prov = ProvenancePathType.get(prov, default=ProvenancePathType.PROV) + prov_format = ProvenanceFormat.get(prov_format) + default_format = output_format + output_format = OutputFormat.get(output_format) + + # if default was originally falsy, it would have been replaced by 'JSON' + # ignore it in this case to resolve any explicitly specified PROV format by itself + if not output_format or not default_format: + if prov == ProvenancePathType.PROV: + prov_format = prov_format or ProvenanceFormat.PROV_JSON + else: + prov_format = None + return prov_format, None + + out_fmt = output_format.split("+", 1)[0] + err_mismatch = ( + None, + f"output format '{output_format}' conflicts with PROV format '{prov_format}'" + ) + + # only main PROV endpoint supports alternate formats + # all others are plain text only + if prov not in [None, ProvenancePathType.PROV]: + if out_fmt in [OutputFormat.TEXT, OutputFormat.TXT]: + return None, None + return err_mismatch + + if out_fmt in [OutputFormat.JSON, OutputFormat.YAML, OutputFormat.YML]: + if prov_format not in [None, ProvenanceFormat.PROV_JSON, ProvenanceFormat.PROV_JSONLD]: + return err_mismatch + if prov_format is None: + prov_format = ProvenanceFormat.PROV_JSON + return prov_format, None + + if out_fmt in [OutputFormat.XML]: + if prov_format not in [None, ProvenanceFormat.PROV_XML]: + return err_mismatch + if prov_format is None: + prov_format = ProvenanceFormat.PROV_XML + return prov_format, None + + if out_fmt in [OutputFormat.TEXT, OutputFormat.TXT]: + if prov_format not in [None, ProvenanceFormat.PROV_N, ProvenanceFormat.PROV_NT, + ProvenanceFormat.PROV_TURTLE]: + return err_mismatch + if prov_format is None: + prov_format = ProvenanceFormat.PROV_N + return prov_format, None + + return None, f"output format '{output_format}' does not have any PROV equivalent" + + +class WeaverResearchObject(ResearchObject): + """ + Defines extended :term:`Provenance` details with `Weaver` operations and referencing the active server instance. + """ + + def __init__(self, job, settings, fs_access, temp_prefix_ro="tmp", orcid="", full_name=""): + # type: (Job, AnySettingsContainer, StdFsAccess, str, str, str) -> None + super(WeaverResearchObject, self).__init__(fs_access, temp_prefix_ro, orcid, full_name) + + # rewrite auto-initialized random UUIDs with Weaver-specific references + self.job = job + self.ro_uuid = job.uuid + self.base_uri = f"arcp://uuid,{self.ro_uuid}/" + self.settings = settings + + @staticmethod + def sha1_uuid(document, identifier): + # type: (ProvDocument, str) -> str + """ + Generate a prefixed SHA1 hash from the identifier value. + """ + sha1_ns = document._namespaces[cwl_prov_const.DATA] + sha1_id = f"{sha1_ns.prefix}:{hashlib.sha1(identifier.encode(), usedforsecurity=False).hexdigest()}" + return sha1_id + + def initialize_provenance(self, full_name, host_provenance, user_provenance, orcid, fsaccess, run_uuid=None): + # type: (str, bool, bool, str, StdFsAccess, Optional[UUID]) -> ProvenanceProfile + """ + Hook `Weaver` metadata onto user provenance step. + """ + prov_profile = super(WeaverResearchObject, self).initialize_provenance( + full_name=full_name, + host_provenance=host_provenance, + user_provenance=user_provenance, + orcid=orcid, + fsaccess=fsaccess, + run_uuid=run_uuid, + ) + document = prov_profile.document + + doi_ns = document.add_namespace("doi", "https://doi.org/") + + weaver_full_name = f"crim-ca/weaver:{weaver_version}" + weaver_code_url = "https://github.com/crim-ca/weaver" + weaver_code_sha1 = self.sha1_uuid(document, weaver_code_url) + weaver_code_entity = document.entity( + weaver_code_sha1, + { + prov_const.PROV_TYPE: prov_const.PROV["PrimarySource"], + prov_const.PROV_LABEL: "Source code repository", + prov_const.PROV_LOCATION: weaver_code_url, + }, + ) + + weaver_url = get_weaver_url(self.settings) + weaver_desc = self.settings.get( + "weaver.wps_metadata_identification_abstract", + "Weaver OGC API Processes Server" + ) + weaver_instance_sha1 = self.sha1_uuid(document, weaver_url) + weaver_instance_meta = [ + (prov_const.PROV_TYPE, prov_const.PROV["SoftwareAgent"]), + (prov_const.PROV_LOCATION, weaver_url), + (prov_const.PROV_LABEL, weaver_desc), + (prov_const.PROV_LABEL, weaver_full_name), + (prov_const.PROV_ATTR_GENERAL_ENTITY, weaver_code_sha1), + (prov_const.PROV_ATTR_SPECIFIC_ENTITY, f"{doi_ns.prefix}:10.5281/zenodo.14210717"), # see CITATION.cff + ] + weaver_instance_agent = document.agent(weaver_instance_sha1, weaver_instance_meta) + + crim_name = "Computer Research Institute of MontrĂ©al" + crim_sha1 = self.sha1_uuid(document, crim_name) + crim_entity = document.entity( + crim_sha1, + { + prov_const.PROV_TYPE: prov_const.PROV["Organization"], + cwl_prov_const.FOAF["name"]: crim_name, + cwl_prov_const.SCHEMA["name"]: crim_name, + } + ) + + server_provider_name = self.settings.get("weaver.wps_metadata_provider_name") + server_provider_url = self.settings.get("weaver.wps_metadata_provider_url") + server_provider_meta = [] + server_provider_entity = None + if server_provider_name: + server_provider_meta.extend([ + (cwl_prov_const.FOAF["name"], server_provider_name), + (cwl_prov_const.SCHEMA["name"], server_provider_name), + ]) + if server_provider_url: + server_provider_meta.extend([ + (prov_const.PROV_LOCATION, server_provider_url), + ]) + if server_provider_meta: + server_provider_id = server_provider_url or server_provider_name + server_provider_sha1 = self.sha1_uuid(document, server_provider_id) + server_provider_meta.extend([ + (prov_const.PROV_TYPE, prov_const.PROV["Organization"]), + (prov_const.PROV_LABEL, "Server Provider"), + ]) + server_provider_entity = document.entity( + server_provider_sha1, + server_provider_meta, + ) + + job_entity = document.entity( + self.job.uuid.urn, + { + prov_const.PROV_TYPE: cwl_prov_const.WFDESC["ProcessRun"], + prov_const.PROV_LOCATION: self.job.job_url(self.settings), + prov_const.PROV_LABEL: "Job Information", + } + ) + proc_url = self.job.process_url(self.settings) + proc_id = f"{self.job.service}:{self.job.process}" if self.job.service else self.job.process + proc_uuid = f"{weaver_instance_sha1}:{proc_id}" + proc_entity = document.entity( + proc_uuid, + { + prov_const.PROV_TYPE: cwl_prov_const.WFDESC["Process"], + prov_const.PROV_LOCATION: proc_url, + prov_const.PROV_LABEL: "Process Description", + } + ) + + # following agents are expected to exist (created by inherited class) + cwltool_agent = document.get_record(cwl_prov_const.ACCOUNT_UUID)[0] + user_agent = document.get_record(cwl_prov_const.USER_UUID)[0] + wf_agent = document.get_record(self.engine_uuid)[0] # current job run aligned with cwl workflow + + # define relationships cross-references: https://wf4ever.github.io/ro/wfprov.owl + document.primary_source(weaver_instance_agent, weaver_code_entity) + document.actedOnBehalfOf(weaver_instance_agent, user_agent) + document.specializationOf(weaver_instance_agent, cwltool_agent) + document.attribution(crim_entity, weaver_code_entity) + document.wasDerivedFrom(cwltool_agent, weaver_instance_agent) + document.wasStartedBy(job_entity, weaver_instance_agent) + document.wasStartedBy(wf_agent, job_entity, time=self.job.created) + document.specializationOf(wf_agent, job_entity) + document.alternateOf(wf_agent, job_entity) + document.wasGeneratedBy(job_entity, proc_entity) + if server_provider_entity: + document.derivation(server_provider_entity, weaver_instance_agent) + document.attribution(server_provider_entity, weaver_instance_agent) + + return prov_profile + + def resolve_user(self): + # type: () -> Tuple[str, str] + """ + Override :mod:`cwltool` default machine user. + """ + weaver_full_name = f"crim-ca/weaver:{weaver_version}" + return weaver_full_name, weaver_full_name + + def resolve_host(self): + # type: () -> Tuple[str, str] + """ + Override :mod:`cwltool` default machine host. + """ + weaver_url = get_weaver_url(self.settings) + weaver_fqdn = urlparse(weaver_url).hostname + return weaver_fqdn, weaver_url diff --git a/weaver/typedefs.py b/weaver/typedefs.py index a41c27a40..08451bd6c 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -88,6 +88,7 @@ from weaver.visibility import AnyVisibility Path = Union[os.PathLike[str], str, bytes] + URL = str Default = TypeVar("Default") # used for return value that is employed from a provided default value Params = ParamSpec("Params") # use with 'Callable[Params, Return]', 'Params.args' and 'Params.kwargs' diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py index d7cc95640..f0f800e77 100644 --- a/weaver/wps_restapi/api.py +++ b/weaver/wps_restapi/api.py @@ -103,6 +103,7 @@ def get_conformance(category, settings): # ogcapi_proc_part3 = "http://www.opengis.net/spec/ogcapi-processes-3/1.0" ogcapi_proc_enabled = asbool(settings.get("weaver.wps_restapi", True)) ogcapi_proc_html = asbool(settings.get("weaver.wps_restapi_html", True)) + ogcapi_proc_prov = asbool(settings.get("weaver.cwl_prov", True)) ogcapi_proc_conformance = ([ f"{ogcapi_common}/conf/core", f"{ogcapi_common}/per/core/additional-link-relations", @@ -523,6 +524,7 @@ def get_conformance(category, settings): # FIXME: support openEO processes (https://github.com/crim-ca/weaver/issues/564) # f"{ogcapi_proc_part3}/conf/openeo-workflows", # f"{ogcapi_proc_part3}/req/openeo-workflows", + f"{ogcapi_proc_part4}/conf/job-management", f"{ogcapi_proc_part4}/conf/jm/create/post-op", f"{ogcapi_proc_part4}/per/job-management/additional-status-codes", # see 'weaver.status.map_status' f"{ogcapi_proc_part4}/per/job-management/create-body", # Weaver has XML for WPS @@ -539,7 +541,7 @@ def get_conformance(category, settings): f"{ogcapi_proc_part4}/req/job-management/create-response-body", f"{ogcapi_proc_part4}/req/job-management/create-response-jobid", f"{ogcapi_proc_part4}/req/job-management/create-response-success", - # FIXME: support Content-Schema and Profile header negociation (https://github.com/crim-ca/weaver/issues/754) + # FIXME: support Content-Schema and Profile header negotiation (https://github.com/crim-ca/weaver/issues/754) # f"{ogcapi_proc_part4}/req/job-management/create-unsupported-schema", f"{ogcapi_proc_part4}/req/job-management/create-unsupported-media-type", f"{ogcapi_proc_part4}/req/job-management/definition-get-op", @@ -552,6 +554,14 @@ def get_conformance(category, settings): f"{ogcapi_proc_part4}/req/job-management/update-patch-op", f"{ogcapi_proc_part4}/req/job-management/update-response", f"{ogcapi_proc_part4}/req/job-management/update-response-locked", + ] + ([ + f"{ogcapi_proc_part4}/req/provenance", + f"{ogcapi_proc_part4}/req/provenance/prov-get-op", + f"{ogcapi_proc_part4}/req/provenance/prov-response", + f"{ogcapi_proc_part4}/req/provenance/prov-content-negotiation", + f"{ogcapi_proc_part4}/req/provenance/inputs-get-op", + f"{ogcapi_proc_part4}/req/provenance/inputs-response", + ] if ogcapi_proc_prov else []) + [ # FIXME: employ 'weaver.wps_restapi.quotation.utils.check_quotation_supported' to add below conditionally # FIXME: https://github.com/crim-ca/weaver/issues/156 (billing/quotation) # https://github.com/opengeospatial/ogcapi-processes/tree/master/extensions/billing @@ -660,6 +670,7 @@ def api_frontpage_body(settings): weaver_url = get_weaver_url(settings) weaver_config = get_weaver_configuration(settings) + weaver_rtd_url = "https://pavics-weaver.readthedocs.io/en/latest" weaver_api = asbool(settings.get("weaver.wps_restapi", True)) weaver_api_url = get_wps_restapi_base_url(settings) weaver_api_oas_ui = weaver_url + sd.api_openapi_ui_service.path if weaver_api else None @@ -669,12 +680,19 @@ def api_frontpage_body(settings): weaver_api_ref = settings.get("weaver.wps_restapi_ref", None) if weaver_api else None weaver_api_html = asbool(settings.get("weaver.wps_restapi_html", True)) and weaver_api weaver_api_html_url = f"{weaver_api_url}?f={OutputFormat.HTML}" + weaver_api_prov = asbool(settings.get("weaver.cwl_prov", True)) and weaver_api + weaver_api_prov_doc = f"{weaver_rtd_url}/processes.html#job-provenance" + weaver_api_prov_oas = f"{weaver_api_oas_ui}#/Provenance" if weaver_api_prov else None weaver_wps = asbool(settings.get("weaver.wps")) weaver_wps_url = get_wps_url(settings) if weaver_wps else None + weaver_wps_oas = f"{weaver_api_oas_ui}#/WPS" if weaver_wps else None weaver_conform_url = weaver_url + sd.api_conformance_service.path weaver_process_url = weaver_api_url + sd.processes_service.path weaver_jobs_url = weaver_api_url + sd.jobs_service.path weaver_vault = asbool(settings.get("weaver.vault")) + weaver_vault_url = f"{weaver_api_url}/vault" if weaver_vault else None + weaver_vault_api = f"{weaver_api_oas_ui}#/Vault" if weaver_vault else None + weaver_vault_doc = f"{weaver_rtd_url}/processes.html#vault-upload" weaver_links = [ {"href": weaver_url, "rel": "self", "type": ContentType.APP_JSON, "title": "This landing page."}, {"href": weaver_conform_url, "rel": "http://www.opengis.net/def/rel/ogc/1.0/conformance", @@ -778,10 +796,13 @@ def api_frontpage_body(settings): "description": __meta__.__description__, "attribution": __meta__.__author__, "parameters": [ - {"name": "api", "enabled": weaver_api, "url": weaver_api_url, "api": weaver_api_oas_ui}, + {"name": "api", "enabled": weaver_api, "url": weaver_api_url, + "doc": weaver_rtd_url, "api": weaver_api_oas_ui}, {"name": "html", "enabled": weaver_api_html, "url": weaver_api_html_url, "api": weaver_api_oas_ui}, - {"name": "vault", "enabled": weaver_vault}, - {"name": "wps", "enabled": weaver_wps, "url": weaver_wps_url, "api": weaver_api_oas_ui}, + {"name": "prov", "enabled": weaver_api_prov, "doc": weaver_api_prov_doc, "api": weaver_api_prov_oas}, + {"name": "vault", "enabled": weaver_vault, "url": weaver_vault_url, + "doc": weaver_vault_doc, "api": weaver_vault_api}, + {"name": "wps", "enabled": weaver_wps, "url": weaver_wps_url, "api": weaver_wps_oas}, ], "links": weaver_links, } diff --git a/weaver/wps_restapi/examples/job_prov.json b/weaver/wps_restapi/examples/job_prov.json new file mode 100644 index 000000000..3543f60a8 --- /dev/null +++ b/weaver/wps_restapi/examples/job_prov.json @@ -0,0 +1,427 @@ +{ + "prefix": { + "wfprov": "http://purl.org/wf4ever/wfprov#", + "wfdesc": "http://purl.org/wf4ever/wfdesc#", + "cwlprov": "https://w3id.org/cwl/prov#", + "foaf": "http://xmlns.com/foaf/0.1/", + "schema": "http://schema.org/", + "orcid": "https://orcid.org/", + "id": "urn:uuid:", + "data": "urn:hash::sha1:", + "sha256": "nih:sha-256;", + "researchobject": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/", + "metadata": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/metadata/", + "provenance": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/metadata/provenance/", + "wf": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/workflow/packed.cwl#", + "input": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/workflow/primary-job.json#", + "doi": "https://doi.org/", + "wf4ever": "http://purl.org/wf4ever/wf4ever#" + }, + "agent": { + "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b": [ + {}, + { + "prov:type": { + "$": "foaf:OnlineAccount", + "type": "prov:QUALIFIED_NAME" + }, + "prov:location": "http://localhost:4002", + "cwlprov:hostname": "localhost" + }, + { + "prov:type": { + "$": "foaf:OnlineAccount", + "type": "prov:QUALIFIED_NAME" + }, + "prov:label": "crim-ca/weaver:6.1.0", + "foaf:accountName": "crim-ca/weaver:6.1.0" + } + ], + "id:b3a49ee7-f620-4154-9e4d-d2e948748deb": { + "prov:type": [ + { + "$": "schema:Person", + "type": "prov:QUALIFIED_NAME" + }, + { + "$": "prov:Person", + "type": "prov:QUALIFIED_NAME" + } + ], + "prov:label": "crim-ca/weaver:6.1.0", + "foaf:name": "crim-ca/weaver:6.1.0", + "foaf:account": { + "$": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b", + "type": "prov:QUALIFIED_NAME" + }, + "schema:name": "crim-ca/weaver:6.1.0" + }, + "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c": { + "prov:type": [ + { + "$": "prov:SoftwareAgent", + "type": "prov:QUALIFIED_NAME" + }, + { + "$": "wfprov:WorkflowEngine", + "type": "prov:QUALIFIED_NAME" + } + ], + "prov:label": "cwltool 3.1.20240708091338.dev15+g9c05bb7d" + }, + "data:15401f8d937f5d526951c1bf20dcba16a1271d97": { + "prov:type": { + "$": "prov:SoftwareAgent", + "type": "prov:QUALIFIED_NAME" + }, + "prov:location": "https://example.com/weaver", + "prov:label": [ + "Weaver internal WPS used for demo and testing.", + "crim-ca/weaver:6.1.0" + ], + "prov:generalEntity": "data:644e201526525f62152815a76a2dc773450f3dd9", + "prov:specificEntity": "doi:10.5281/zenodo.14210717" + }, + "id:2148aee6-81e4-4bcd-9e48-78ff46a51ff1": { + "prov:type": { + "$": "prov:SoftwareAgent", + "type": "prov:QUALIFIED_NAME" + }, + "cwlprov:image": "debian:stretch-slim", + "prov:label": "Container execution of image debian:stretch-slim" + } + }, + "actedOnBehalfOf": { + "_:id1": { + "prov:delegate": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b", + "prov:responsible": "id:b3a49ee7-f620-4154-9e4d-d2e948748deb" + }, + "_:id6": { + "prov:delegate": "data:15401f8d937f5d526951c1bf20dcba16a1271d97", + "prov:responsible": "id:b3a49ee7-f620-4154-9e4d-d2e948748deb" + } + }, + "wasStartedBy": { + "_:id2": { + "prov:activity": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c", + "prov:starter": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b", + "prov:time": "2024-12-12T09:16:17.843783" + }, + "_:id4": { + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:starter": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c", + "prov:time": "2024-12-12T09:16:17.843852" + }, + "_:id10": { + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:trigger": "data:15401f8d937f5d526951c1bf20dcba16a1271d97" + }, + "_:id11": { + "prov:activity": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c", + "prov:trigger": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:time": "2024-12-12T14:15:50.834000+00:00" + } + }, + "activity": { + "id:1c49f085-bbd7-410d-a801-81fd42469e8a": { + "prov:startTime": "2024-12-12T09:16:17.843806", + "prov:type": { + "$": "wfprov:WorkflowRun", + "type": "prov:QUALIFIED_NAME" + }, + "prov:label": "Run of workflow/packed.cwl#main" + } + }, + "wasAssociatedWith": { + "_:id3": { + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:agent": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c", + "prov:plan": "wf:main" + }, + "_:id18": { + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:agent": "id:2148aee6-81e4-4bcd-9e48-78ff46a51ff1" + } + }, + "entity": { + "data:644e201526525f62152815a76a2dc773450f3dd9": { + "prov:type": { + "$": "prov:PrimarySource", + "type": "prov:QUALIFIED_NAME" + }, + "prov:label": "Source code repository", + "prov:location": "https://github.com/crim-ca/weaver" + }, + "data:3102f6d7a018ebae572f457d711ed7e1e7a11bc2": { + "prov:type": { + "$": "prov:Organization", + "type": "prov:QUALIFIED_NAME" + }, + "foaf:name": "Computer Research Institute of Montr\u00e9al", + "schema:name": "Computer Research Institute of Montr\u00e9al" + }, + "data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0": { + "foaf:name": "CRIM", + "schema:name": "CRIM", + "prov:location": "http://pavics-weaver.readthedocs.org/en/latest/", + "prov:type": { + "$": "prov:Organization", + "type": "prov:QUALIFIED_NAME" + }, + "prov:label": "Server Provider" + }, + "id:1c49f085-bbd7-410d-a801-81fd42469e8a": { + "prov:type": { + "$": "wfdesc:ProcessRun", + "type": "prov:QUALIFIED_NAME" + }, + "prov:location": "https://example.com/weaver/processes/echo/jobs/1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:label": "Job Information" + }, + "data:15401f8d937f5d526951c1bf20dcba16a1271d97:echo": { + "prov:type": { + "$": "wfdesc:Process", + "type": "prov:QUALIFIED_NAME" + }, + "prov:location": "https://example.com/weaver/processes/echo", + "prov:label": "Process Description" + }, + "wf:main": { + "prov:type": [ + { + "$": "wfdesc:Process", + "type": "prov:QUALIFIED_NAME" + }, + { + "$": "prov:Plan", + "type": "prov:QUALIFIED_NAME" + } + ], + "prov:label": "Prospective provenance" + }, + "data:2ef7bde608ce5404e97d5f042f95f89f1c232871": [ + { + "prov:type": { + "$": "wfprov:Artifact", + "type": "prov:QUALIFIED_NAME" + }, + "prov:value": "Hello World!" + }, + { + "prov:type": { + "$": "wfprov:Artifact", + "type": "prov:QUALIFIED_NAME" + }, + "prov:value": "Hello World!" + } + ], + "data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b": [ + { + "prov:type": { + "$": "wfprov:Artifact", + "type": "prov:QUALIFIED_NAME" + } + }, + {} + ], + "id:59967079-217e-4bdb-92d7-2ef2f784825c": { + "prov:type": [ + { + "$": "wf4ever:File", + "type": "prov:QUALIFIED_NAME" + }, + { + "$": "wfprov:Artifact", + "type": "prov:QUALIFIED_NAME" + } + ], + "cwlprov:basename": "stdout.log", + "cwlprov:nameroot": "stdout", + "cwlprov:nameext": ".log" + }, + "data:da39a3ee5e6b4b0d3255bfef95601890afd80709": { + "prov:type": { + "$": "wfprov:Artifact", + "type": "prov:QUALIFIED_NAME" + } + }, + "id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b": { + "prov:type": [ + { + "$": "wf4ever:File", + "type": "prov:QUALIFIED_NAME" + }, + { + "$": "wfprov:Artifact", + "type": "prov:QUALIFIED_NAME" + } + ], + "cwlprov:basename": "stderr.log", + "cwlprov:nameroot": "stderr", + "cwlprov:nameext": ".log" + }, + "id:6b04550d-c2bd-400b-858b-14e287bbf8c3": { + "prov:type": [ + { + "$": "wf4ever:File", + "type": "prov:QUALIFIED_NAME" + }, + { + "$": "wfprov:Artifact", + "type": "prov:QUALIFIED_NAME" + } + ], + "cwlprov:basename": "stdout.log", + "cwlprov:nameroot": "stdout", + "cwlprov:nameext": ".log" + } + }, + "wasDerivedFrom": { + "_:id5": { + "prov:generatedEntity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97", + "prov:usedEntity": "data:644e201526525f62152815a76a2dc773450f3dd9", + "prov:type": { + "$": "prov:PrimarySource", + "type": "prov:QUALIFIED_NAME" + } + }, + "_:id9": { + "prov:generatedEntity": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b", + "prov:usedEntity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97" + }, + "_:id15": { + "prov:generatedEntity": "data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0", + "prov:usedEntity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97" + } + }, + "specializationOf": { + "_:id7": { + "prov:specificEntity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97", + "prov:generalEntity": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b" + }, + "_:id12": { + "prov:specificEntity": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c", + "prov:generalEntity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a" + }, + "_:id20": { + "prov:specificEntity": "id:59967079-217e-4bdb-92d7-2ef2f784825c", + "prov:generalEntity": "data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b" + }, + "_:id22": { + "prov:specificEntity": "id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b", + "prov:generalEntity": "data:da39a3ee5e6b4b0d3255bfef95601890afd80709" + }, + "_:id24": { + "prov:specificEntity": "id:6b04550d-c2bd-400b-858b-14e287bbf8c3", + "prov:generalEntity": "data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b" + } + }, + "wasAttributedTo": { + "_:id8": { + "prov:entity": "data:3102f6d7a018ebae572f457d711ed7e1e7a11bc2", + "prov:agent": "data:644e201526525f62152815a76a2dc773450f3dd9" + }, + "_:id16": { + "prov:entity": "data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0", + "prov:agent": "data:15401f8d937f5d526951c1bf20dcba16a1271d97" + } + }, + "alternateOf": { + "_:id13": { + "prov:alternate1": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c", + "prov:alternate2": "id:1c49f085-bbd7-410d-a801-81fd42469e8a" + } + }, + "wasGeneratedBy": { + "_:id14": { + "prov:entity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:activity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97:echo" + }, + "_:id21": { + "prov:entity": "id:59967079-217e-4bdb-92d7-2ef2f784825c", + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:time": "2024-12-12T09:16:18.867039", + "prov:role": { + "$": "wf:main/echo/output", + "type": "prov:QUALIFIED_NAME" + } + }, + "_:id23": { + "prov:entity": "id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b", + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:time": "2024-12-12T09:16:18.867039", + "prov:role": { + "$": "wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4", + "type": "prov:QUALIFIED_NAME" + } + }, + "_:id25": { + "prov:entity": "id:6b04550d-c2bd-400b-858b-14e287bbf8c3", + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:time": "2024-12-12T09:16:18.867039", + "prov:role": { + "$": "wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1", + "type": "prov:QUALIFIED_NAME" + } + }, + "_:id27": { + "prov:entity": "id:59967079-217e-4bdb-92d7-2ef2f784825c", + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:time": "2024-12-12T09:16:18.869180", + "prov:role": { + "$": "wf:main/primary/output", + "type": "prov:QUALIFIED_NAME" + } + }, + "_:id28": { + "prov:entity": "id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b", + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:time": "2024-12-12T09:16:18.869180", + "prov:role": { + "$": "wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4", + "type": "prov:QUALIFIED_NAME" + } + }, + "_:id29": { + "prov:entity": "id:6b04550d-c2bd-400b-858b-14e287bbf8c3", + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:time": "2024-12-12T09:16:18.869180", + "prov:role": { + "$": "wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1", + "type": "prov:QUALIFIED_NAME" + } + } + }, + "used": { + "_:id17": { + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:entity": "data:2ef7bde608ce5404e97d5f042f95f89f1c232871", + "prov:time": "2024-12-12T09:16:17.846280", + "prov:role": { + "$": "wf:main/message", + "type": "prov:QUALIFIED_NAME" + } + }, + "_:id19": { + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:entity": "data:2ef7bde608ce5404e97d5f042f95f89f1c232871", + "prov:time": "2024-12-12T09:16:17.863017", + "prov:role": { + "$": "wf:main/echo/message", + "type": "prov:QUALIFIED_NAME" + } + } + }, + "wasEndedBy": { + "_:id26": { + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:ender": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:time": "2024-12-12T09:16:18.867034" + }, + "_:id30": { + "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a", + "prov:ender": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c", + "prov:time": "2024-12-12T09:16:18.869299" + } + } +} diff --git a/weaver/wps_restapi/examples/job_prov.txt b/weaver/wps_restapi/examples/job_prov.txt new file mode 100644 index 000000000..85d8c34c9 --- /dev/null +++ b/weaver/wps_restapi/examples/job_prov.txt @@ -0,0 +1,71 @@ +document + prefix wfprov + prefix wfdesc + prefix cwlprov + prefix foaf + prefix schema + prefix orcid + prefix id + prefix data + prefix sha256 + prefix researchobject + prefix metadata + prefix provenance + prefix wf + prefix input + prefix doi + prefix wf4ever + + agent(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b) + agent(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, [prov:type='foaf:OnlineAccount', prov:location="http://localhost:4002", cwlprov:hostname="localhost"]) + agent(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, [prov:type='foaf:OnlineAccount', prov:label="crim-ca/weaver:6.0.0", foaf:accountName="crim-ca/weaver:6.0.0"]) + agent(id:b3a49ee7-f620-4154-9e4d-d2e948748deb, [prov:type='schema:Person', prov:type='prov:Person', prov:label="crim-ca/weaver:6.0.0", foaf:name="crim-ca/weaver:6.0.0", foaf:account='id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b', schema:name="crim-ca/weaver:6.0.0"]) + actedOnBehalfOf(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, id:b3a49ee7-f620-4154-9e4d-d2e948748deb, -) + agent(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, [prov:type='prov:SoftwareAgent', prov:type='wfprov:WorkflowEngine', prov:label="cwltool 3.1.20240708091338.dev15+g9c05bb7d"]) + wasStartedBy(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, -, id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, 2024-12-12T09:16:17.843783) + activity(id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:17.843806, -, [prov:type='wfprov:WorkflowRun', prov:label="Run of workflow/packed.cwl#main"]) + wasAssociatedWith(id:1c49f085-bbd7-410d-a801-81fd42469e8a, id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, wf:main) + wasStartedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, -, id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, 2024-12-12T09:16:17.843852) + entity(data:644e201526525f62152815a76a2dc773450f3dd9, [prov:type='prov:PrimarySource', prov:label="Source code repository", prov:location="https://github.com/crim-ca/weaver"]) + agent(data:15401f8d937f5d526951c1bf20dcba16a1271d97, [prov:type='prov:SoftwareAgent', prov:location="http://localhost:4002", prov:label="Weaver internal WPS used for demo and testing.", prov:label="crim-ca/weaver:6.0.0", prov:generalEntity='data:644e201526525f62152815a76a2dc773450f3dd9', prov:specificEntity='doi:10.5281/zenodo.14210717']) + entity(data:3102f6d7a018ebae572f457d711ed7e1e7a11bc2, [prov:type='prov:Organization', foaf:name="Computer Research Institute of MontrĂ©al", schema:name="Computer Research Institute of MontrĂ©al"]) + entity(data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0, [foaf:name="CRIM", schema:name="CRIM", prov:location="http://pavics-weaver.readthedocs.org/en/latest/", prov:type='prov:Organization', prov:label="Server Provider"]) + entity(id:1c49f085-bbd7-410d-a801-81fd42469e8a, [prov:type='wfdesc:ProcessRun', prov:location="http://localhost:4002/processes/echo/jobs/1c49f085-bbd7-410d-a801-81fd42469e8a", prov:label="Job Information"]) + entity(data:15401f8d937f5d526951c1bf20dcba16a1271d97:echo, [prov:type='wfdesc:Process', prov:location="http://localhost:4002/processes/echo", prov:label="Process Description"]) + wasDerivedFrom(data:15401f8d937f5d526951c1bf20dcba16a1271d97, data:644e201526525f62152815a76a2dc773450f3dd9, -, -, -, [prov:type='prov:PrimarySource']) + actedOnBehalfOf(data:15401f8d937f5d526951c1bf20dcba16a1271d97, id:b3a49ee7-f620-4154-9e4d-d2e948748deb, -) + specializationOf(data:15401f8d937f5d526951c1bf20dcba16a1271d97, id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b) + wasAttributedTo(data:3102f6d7a018ebae572f457d711ed7e1e7a11bc2, data:644e201526525f62152815a76a2dc773450f3dd9) + wasDerivedFrom(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, data:15401f8d937f5d526951c1bf20dcba16a1271d97, -, -, -) + wasStartedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, data:15401f8d937f5d526951c1bf20dcba16a1271d97, -, -) + wasStartedBy(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, id:1c49f085-bbd7-410d-a801-81fd42469e8a, -, 2024-12-12T14:15:50.834000+00:00) + specializationOf(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, id:1c49f085-bbd7-410d-a801-81fd42469e8a) + alternateOf(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, id:1c49f085-bbd7-410d-a801-81fd42469e8a) + wasGeneratedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, data:15401f8d937f5d526951c1bf20dcba16a1271d97:echo, -) + wasDerivedFrom(data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0, data:15401f8d937f5d526951c1bf20dcba16a1271d97, -, -, -) + wasAttributedTo(data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0, data:15401f8d937f5d526951c1bf20dcba16a1271d97) + entity(wf:main, [prov:type='wfdesc:Process', prov:type='prov:Plan', prov:label="Prospective provenance"]) + entity(data:2ef7bde608ce5404e97d5f042f95f89f1c232871, [prov:type='wfprov:Artifact', prov:value="Hello World!"]) + used(id:1c49f085-bbd7-410d-a801-81fd42469e8a, data:2ef7bde608ce5404e97d5f042f95f89f1c232871, 2024-12-12T09:16:17.846280, [prov:role='wf:main/message']) + agent(id:2148aee6-81e4-4bcd-9e48-78ff46a51ff1, [prov:type='prov:SoftwareAgent', cwlprov:image="debian:stretch-slim", prov:label="Container execution of image debian:stretch-slim"]) + wasAssociatedWith(id:1c49f085-bbd7-410d-a801-81fd42469e8a, id:2148aee6-81e4-4bcd-9e48-78ff46a51ff1, -) + entity(data:2ef7bde608ce5404e97d5f042f95f89f1c232871, [prov:type='wfprov:Artifact', prov:value="Hello World!"]) + used(id:1c49f085-bbd7-410d-a801-81fd42469e8a, data:2ef7bde608ce5404e97d5f042f95f89f1c232871, 2024-12-12T09:16:17.863017, [prov:role='wf:main/echo/message']) + entity(data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b, [prov:type='wfprov:Artifact']) + entity(id:59967079-217e-4bdb-92d7-2ef2f784825c, [prov:type='wf4ever:File', prov:type='wfprov:Artifact', cwlprov:basename="stdout.log", cwlprov:nameroot="stdout", cwlprov:nameext=".log"]) + specializationOf(id:59967079-217e-4bdb-92d7-2ef2f784825c, data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b) + wasGeneratedBy(id:59967079-217e-4bdb-92d7-2ef2f784825c, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.867039, [prov:role='wf:main/echo/output']) + entity(data:da39a3ee5e6b4b0d3255bfef95601890afd80709, [prov:type='wfprov:Artifact']) + entity(id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b, [prov:type='wf4ever:File', prov:type='wfprov:Artifact', cwlprov:basename="stderr.log", cwlprov:nameroot="stderr", cwlprov:nameext=".log"]) + specializationOf(id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b, data:da39a3ee5e6b4b0d3255bfef95601890afd80709) + wasGeneratedBy(id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.867039, [prov:role='wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4']) + entity(data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b) + entity(id:6b04550d-c2bd-400b-858b-14e287bbf8c3, [prov:type='wf4ever:File', prov:type='wfprov:Artifact', cwlprov:basename="stdout.log", cwlprov:nameroot="stdout", cwlprov:nameext=".log"]) + specializationOf(id:6b04550d-c2bd-400b-858b-14e287bbf8c3, data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b) + wasGeneratedBy(id:6b04550d-c2bd-400b-858b-14e287bbf8c3, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.867039, [prov:role='wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1']) + wasEndedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, -, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.867034) + wasGeneratedBy(id:59967079-217e-4bdb-92d7-2ef2f784825c, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.869180, [prov:role='wf:main/primary/output']) + wasGeneratedBy(id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.869180, [prov:role='wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4']) + wasGeneratedBy(id:6b04550d-c2bd-400b-858b-14e287bbf8c3, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.869180, [prov:role='wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1']) + wasEndedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, -, id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, 2024-12-12T09:16:18.869299) +endDocument diff --git a/weaver/wps_restapi/examples/job_prov.xml b/weaver/wps_restapi/examples/job_prov.xml new file mode 100644 index 000000000..c25e44535 --- /dev/null +++ b/weaver/wps_restapi/examples/job_prov.xml @@ -0,0 +1,267 @@ + + + + + http://localhost:4002 + foaf:OnlineAccount + localhost + + + crim-ca/weaver:6.0.0 + foaf:OnlineAccount + crim-ca/weaver:6.0.0 + + + crim-ca/weaver:6.0.0 + schema:Person + id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b + crim-ca/weaver:6.0.0 + crim-ca/weaver:6.0.0 + + + + + + + cwltool 3.1.20240708091338.dev15+g9c05bb7d + wfprov:WorkflowEngine + + + + + 2024-12-12T09:16:17.843783 + + + 2024-12-12T09:16:17.843806 + Run of workflow/packed.cwl#main + wfprov:WorkflowRun + + + + + + + + + + 2024-12-12T09:16:17.843852 + + + Source code repository + https://github.com/crim-ca/weaver + + + Weaver internal WPS used for demo and testing. + crim-ca/weaver:6.0.0 + http://localhost:4002 + + + + + Computer Research Institute of Montréal + Computer Research Institute of Montréal + + + Server Provider + http://pavics-weaver.readthedocs.org/en/latest/ + CRIM + CRIM + + + Job Information + http://localhost:4002/processes/echo/jobs/1c49f085-bbd7-410d-a801-81fd42469e8a + wfdesc:ProcessRun + + + Process Description + http://localhost:4002/processes/echo + wfdesc:Process + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 2024-12-12T14:15:50.834000+00:00 + + + + + + + + + + + + + + + + + + + + + + + Prospective provenance + wfdesc:Process + + + wfprov:Artifact + Hello World! + + + + + 2024-12-12T09:16:17.846280 + wf:main/message + + + Container execution of image debian:stretch-slim + debian:stretch-slim + + + + + + + wfprov:Artifact + Hello World! + + + + + 2024-12-12T09:16:17.863017 + wf:main/echo/message + + + wfprov:Artifact + + + wf4ever:File + wfprov:Artifact + stdout.log + .log + stdout + + + + + + + + + 2024-12-12T09:16:18.867039 + wf:main/echo/output + + + wfprov:Artifact + + + wf4ever:File + wfprov:Artifact + stderr.log + .log + stderr + + + + + + + + + 2024-12-12T09:16:18.867039 + wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4 + + + + wf4ever:File + wfprov:Artifact + stdout.log + .log + stdout + + + + + + + + + 2024-12-12T09:16:18.867039 + wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1 + + + + + 2024-12-12T09:16:18.867034 + + + + + 2024-12-12T09:16:18.869180 + wf:main/primary/output + + + + + 2024-12-12T09:16:18.869180 + wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4 + + + + + 2024-12-12T09:16:18.869180 + wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1 + + + + + 2024-12-12T09:16:18.869299 + + diff --git a/weaver/wps_restapi/examples/job_prov_info.txt b/weaver/wps_restapi/examples/job_prov_info.txt new file mode 100644 index 000000000..b353a3018 --- /dev/null +++ b/weaver/wps_restapi/examples/job_prov_info.txt @@ -0,0 +1,5 @@ +Research Object of CWL workflow run +Research Object ID: arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/ +Profile: https://w3id.org/cwl/prov/0.6.0 +Workflow run ID: urn:uuid:1c49f085-bbd7-410d-a801-81fd42469e8a +Packaged: 2024-12-12 diff --git a/weaver/wps_restapi/examples/job_prov_run.txt b/weaver/wps_restapi/examples/job_prov_run.txt new file mode 100644 index 000000000..cdb34242c --- /dev/null +++ b/weaver/wps_restapi/examples/job_prov_run.txt @@ -0,0 +1,10 @@ +2024-12-12 09:16:17.843852 Flow 1c49f085-bbd7-410d-a801-81fd42469e8a [ Run of workflow/packed.cwl#main +2024-12-12 09:16:17.846280 In 2ef7bde608ce5404e97d5f042f95f89f1c232871 < wf:main/message +2024-12-12 09:16:17.863017 In 2ef7bde608ce5404e97d5f042f95f89f1c232871 < wf:main/echo/message + 2024-12-12 09:16:18.867039 Out 59967079-217e-4bdb-92d7-2ef2f784825c > wf:main/echo/output + 2024-12-12 09:16:18.867039 Out 2ab450c1-7309-4c5b-b65b-b4dfa44f384b > wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4 + 2024-12-12 09:16:18.867039 Out 6b04550d-c2bd-400b-858b-14e287bbf8c3 > wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1 + 2024-12-12 09:16:18.869180 Out 59967079-217e-4bdb-92d7-2ef2f784825c > wf:main/primary/output + 2024-12-12 09:16:18.869180 Out 2ab450c1-7309-4c5b-b65b-b4dfa44f384b > wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4 + 2024-12-12 09:16:18.869180 Out 6b04550d-c2bd-400b-858b-14e287bbf8c3 > wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1 + 2024-12-12 09:16:18.867034 Flow 1c49f085-bbd7-410d-a801-81fd42469e8a ] Run of workflow/packed.cwl#main (0:00:01.023182) diff --git a/weaver/wps_restapi/examples/job_prov_who.txt b/weaver/wps_restapi/examples/job_prov_who.txt new file mode 100644 index 000000000..a20e771f4 --- /dev/null +++ b/weaver/wps_restapi/examples/job_prov_who.txt @@ -0,0 +1,2 @@ +Packaged By: cwltool 3.1.20240708091338.dev15+g9c05bb7d +Executed By: crim-ca/weaver:6.1.0 diff --git a/weaver/wps_restapi/jobs/__init__.py b/weaver/wps_restapi/jobs/__init__.py index d9df9ee7b..e28ecc3ac 100644 --- a/weaver/wps_restapi/jobs/__init__.py +++ b/weaver/wps_restapi/jobs/__init__.py @@ -1,6 +1,10 @@ import logging from typing import TYPE_CHECKING +from pyramid.settings import asbool + +from weaver.utils import get_settings + if TYPE_CHECKING: from pyramid.config import Configurator @@ -11,3 +15,14 @@ def includeme(config): # type: (Configurator) -> None LOGGER.info("Adding WPS REST API jobs...") config.include("weaver.wps_restapi.jobs.jobs") + + settings = get_settings(config) + weaver_cwl_prov = asbool(settings.get("weaver.cwl_prov", True)) + if not weaver_cwl_prov: + LOGGER.warning( + "Skipping Weaver PROV views [weaver.cwl_prov=false]. " + "Job Provenance endpoints will not be available." + ) + else: + LOGGER.info("Adding Weaver REST API Job Provenance....") + config.include("weaver.wps_restapi.jobs.prov") diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py index e5a17e953..e507e2826 100644 --- a/weaver/wps_restapi/jobs/jobs.py +++ b/weaver/wps_restapi/jobs/jobs.py @@ -71,7 +71,7 @@ accept=ContentType.TEXT_HTML, renderer="weaver.wps_restapi:templates/responses/job_listing.mako", response_schemas=sd.derive_responses( - sd.get_prov_all_jobs_responses, + sd.get_provider_all_jobs_responses, sd.GenericHTMLResponse(name="HTMLProviderJobListing", description="Listing of jobs.") ), ) @@ -80,7 +80,7 @@ schema=sd.GetProviderJobsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_prov_all_jobs_responses, + response_schemas=sd.get_provider_all_jobs_responses, ) @sd.process_jobs_service.get( tags=[sd.TAG_JOBS, sd.TAG_PROCESSES], @@ -325,7 +325,7 @@ def trigger_job_execution(request): for profile in JobStatusSchema.values() ], renderer=OutputFormat.JSON, - response_schemas=sd.get_prov_single_job_status_responses, + response_schemas=sd.get_provider_single_job_status_responses, ) @sd.process_job_service.get( tags=[sd.TAG_PROCESSES, sd.TAG_JOBS, sd.TAG_STATUS], @@ -401,7 +401,7 @@ def update_pending_job(request): schema=sd.DeleteProviderJobEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.delete_prov_job_responses, + response_schemas=sd.delete_provider_job_responses, ) @sd.process_job_service.delete( tags=[sd.TAG_JOBS, sd.TAG_DISMISS, sd.TAG_PROCESSES], @@ -499,7 +499,7 @@ def cancel_job_batch(request): schema=sd.ProviderInputsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_prov_inputs_responses, + response_schemas=sd.get_provider_inputs_responses, ) @sd.process_inputs_service.get( tags=[sd.TAG_JOBS, sd.TAG_RESULTS, sd.TAG_PROCESSES], @@ -555,7 +555,7 @@ def get_job_inputs(request): schema=sd.ProviderOutputsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_prov_outputs_responses, + response_schemas=sd.get_provider_outputs_responses, ) @sd.process_outputs_service.get( tags=[sd.TAG_JOBS, sd.TAG_RESULTS, sd.TAG_PROCESSES], @@ -593,7 +593,7 @@ def get_job_outputs(request): schema=sd.ProviderResultsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_prov_results_responses, + response_schemas=sd.get_provider_results_responses, ) @sd.process_results_service.get( tags=[sd.TAG_JOBS, sd.TAG_RESULTS, sd.TAG_PROCESSES], @@ -625,21 +625,21 @@ def get_job_results(request): schema=sd.ProviderExceptionsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_prov_exceptions_responses, + response_schemas=sd.get_provider_exceptions_responses, ) @sd.process_exceptions_service.get( tags=[sd.TAG_JOBS, sd.TAG_EXCEPTIONS, sd.TAG_PROCESSES], schema=sd.ProcessExceptionsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_exceptions_responses, + response_schemas=sd.get_job_exceptions_responses, ) @sd.job_exceptions_service.get( tags=[sd.TAG_JOBS, sd.TAG_EXCEPTIONS], schema=sd.JobExceptionsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_exceptions_responses, + response_schemas=sd.get_job_exceptions_responses, ) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_job_exceptions(request): @@ -664,7 +664,7 @@ def get_job_exceptions(request): ContentType.TEXT_PLAIN, ], renderer=OutputFormat.JSON, - response_schemas=sd.get_prov_logs_responses, + response_schemas=sd.get_provider_logs_responses, ) @sd.process_logs_service.get( tags=[sd.TAG_JOBS, sd.TAG_LOGS, sd.TAG_PROCESSES], @@ -677,7 +677,7 @@ def get_job_exceptions(request): ContentType.TEXT_PLAIN, ], renderer=OutputFormat.JSON, - response_schemas=sd.get_logs_responses, + response_schemas=sd.get_job_logs_responses, ) @sd.job_logs_service.get( tags=[sd.TAG_JOBS, sd.TAG_LOGS], @@ -690,7 +690,7 @@ def get_job_exceptions(request): ContentType.TEXT_PLAIN, ], renderer=OutputFormat.JSON, - response_schemas=sd.get_logs_responses, + response_schemas=sd.get_job_logs_responses, ) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_job_logs(request): @@ -717,21 +717,21 @@ def get_job_logs(request): schema=sd.ProviderJobStatisticsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_prov_stats_responses, + response_schemas=sd.get_provider_stats_responses, ) @sd.process_stats_service.get( tags=[sd.TAG_JOBS, sd.TAG_STATISTICS, sd.TAG_PROCESSES], schema=sd.ProcessJobStatisticsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_stats_responses, + response_schemas=sd.get_job_stats_responses, ) @sd.job_stats_service.get( tags=[sd.TAG_JOBS, sd.TAG_STATISTICS], schema=sd.JobStatisticsEndpoint(), accept=ContentType.APP_JSON, renderer=OutputFormat.JSON, - response_schemas=sd.get_stats_responses, + response_schemas=sd.get_job_stats_responses, ) @log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) def get_job_stats(request): @@ -805,14 +805,6 @@ def includeme(config): config.add_cornice_service(sd.job_exceptions_service) config.add_cornice_service(sd.job_logs_service) config.add_cornice_service(sd.job_stats_service) - config.add_cornice_service(sd.provider_job_service) - config.add_cornice_service(sd.provider_jobs_service) - config.add_cornice_service(sd.provider_results_service) - config.add_cornice_service(sd.provider_outputs_service) - config.add_cornice_service(sd.provider_inputs_service) - config.add_cornice_service(sd.provider_exceptions_service) - config.add_cornice_service(sd.provider_logs_service) - config.add_cornice_service(sd.provider_stats_service) config.add_cornice_service(sd.process_jobs_service) config.add_cornice_service(sd.process_job_service) config.add_cornice_service(sd.process_results_service) @@ -821,6 +813,14 @@ def includeme(config): config.add_cornice_service(sd.process_exceptions_service) config.add_cornice_service(sd.process_logs_service) config.add_cornice_service(sd.process_stats_service) + config.add_cornice_service(sd.provider_job_service) + config.add_cornice_service(sd.provider_jobs_service) + config.add_cornice_service(sd.provider_results_service) + config.add_cornice_service(sd.provider_outputs_service) + config.add_cornice_service(sd.provider_inputs_service) + config.add_cornice_service(sd.provider_exceptions_service) + config.add_cornice_service(sd.provider_logs_service) + config.add_cornice_service(sd.provider_stats_service) # backward compatibility routes (deprecated) config.add_cornice_service(sd.job_result_service) diff --git a/weaver/wps_restapi/jobs/prov.py b/weaver/wps_restapi/jobs/prov.py new file mode 100644 index 000000000..c79db2c5b --- /dev/null +++ b/weaver/wps_restapi/jobs/prov.py @@ -0,0 +1,238 @@ +import logging +from typing import TYPE_CHECKING + +from weaver.exceptions import log_unhandled_exceptions +from weaver.formats import ContentType +from weaver.wps_restapi import swagger_definitions as sd +from weaver.wps_restapi.jobs.utils import get_job_prov_response + +if TYPE_CHECKING: + from pyramid.config import Configurator + + from weaver.typedefs import AnyResponseType, PyramidRequest + +LOGGER = logging.getLogger(__name__) + + +@sd.provider_prov_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvEndpoint(), + accept=sd.JobProvAcceptHeader.validator.choices, + response_schemas=sd.get_job_prov_responses, +) +@sd.process_prov_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvEndpoint(), + accept=sd.JobProvAcceptHeader.validator.choices, + response_schemas=sd.get_job_prov_responses, +) +@sd.job_prov_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvEndpoint(), + accept=sd.JobProvAcceptHeader.validator.choices, + response_schemas=sd.get_job_prov_responses, +) +@sd.provider_prov_info_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.process_prov_info_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.job_prov_info_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.provider_prov_who_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.process_prov_who_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.job_prov_who_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.provider_prov_inputs_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.process_prov_inputs_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.job_prov_inputs_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.provider_prov_inputs_run_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.process_prov_inputs_run_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.job_prov_inputs_run_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.provider_prov_outputs_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.process_prov_outputs_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.job_prov_outputs_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.provider_prov_outputs_run_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.process_prov_outputs_run_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.job_prov_outputs_run_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_responses, # FIXME +) +@sd.provider_prov_run_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.process_prov_run_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.job_prov_run_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.provider_prov_run_id_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.process_prov_run_id_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.job_prov_run_id_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.provider_prov_runs_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS], + schema=sd.ProviderJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.process_prov_runs_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES], + schema=sd.ProcessJobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@sd.job_prov_runs_service.get( + tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE], + schema=sd.JobProvMetadataEndpoint(), + accept=ContentType.TEXT_PLAIN, + response_schemas=sd.get_job_prov_metadata_responses, +) +@log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description) +def get_job_prov(request): + # type: (PyramidRequest) -> AnyResponseType + """ + Retrieve the provenance details of a job based on the contextual request path. + """ + return get_job_prov_response(request) + + +def includeme(config): + # type: (Configurator) -> None + LOGGER.info("Adding WPS REST API jobs PROV views...") + config.add_cornice_service(sd.job_prov_service) + config.add_cornice_service(sd.job_prov_info_service) + config.add_cornice_service(sd.job_prov_who_service) + config.add_cornice_service(sd.job_prov_inputs_service) + config.add_cornice_service(sd.job_prov_inputs_run_service) + config.add_cornice_service(sd.job_prov_outputs_service) + config.add_cornice_service(sd.job_prov_outputs_run_service) + config.add_cornice_service(sd.job_prov_run_service) + config.add_cornice_service(sd.job_prov_run_id_service) + config.add_cornice_service(sd.job_prov_runs_service) + config.add_cornice_service(sd.process_prov_service) + config.add_cornice_service(sd.process_prov_info_service) + config.add_cornice_service(sd.process_prov_who_service) + config.add_cornice_service(sd.process_prov_inputs_service) + config.add_cornice_service(sd.process_prov_inputs_run_service) + config.add_cornice_service(sd.process_prov_outputs_service) + config.add_cornice_service(sd.process_prov_outputs_run_service) + config.add_cornice_service(sd.process_prov_run_service) + config.add_cornice_service(sd.process_prov_run_id_service) + config.add_cornice_service(sd.process_prov_runs_service) + config.add_cornice_service(sd.provider_prov_service) + config.add_cornice_service(sd.provider_prov_info_service) + config.add_cornice_service(sd.provider_prov_who_service) + config.add_cornice_service(sd.provider_prov_inputs_service) + config.add_cornice_service(sd.provider_prov_inputs_run_service) + config.add_cornice_service(sd.provider_prov_outputs_service) + config.add_cornice_service(sd.provider_prov_outputs_run_service) + config.add_cornice_service(sd.provider_prov_run_service) + config.add_cornice_service(sd.provider_prov_run_id_service) + config.add_cornice_service(sd.provider_prov_runs_service) diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py index 3f4ee4673..e63fac5bf 100644 --- a/weaver/wps_restapi/jobs/utils.py +++ b/weaver/wps_restapi/jobs/utils.py @@ -14,6 +14,7 @@ HTTPInternalServerError, HTTPLocked, HTTPNoContent, + HTTPNotAcceptable, HTTPNotFound, HTTPOk ) @@ -40,7 +41,14 @@ parse_prefer_header_return, update_preference_applied_return_header ) -from weaver.formats import ContentEncoding, ContentType, clean_media_type_format, get_format, repr_json +from weaver.formats import ( + ContentEncoding, + ContentType, + clean_media_type_format, + get_format, + guess_target_format, + repr_json +) from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound from weaver.processes.constants import JobInputsOutputsSchema, JobStatusSchema from weaver.processes.convert import any2wps_literal_datatype, convert_output_params_schema, get_field @@ -1410,3 +1418,41 @@ def dismiss_job_task(job, container): job.status = map_status(Status.DISMISSED) job = store.update_job(job) return job + + +def get_job_prov_response(request): + # type: (PyramidRequest) -> AnyResponseType + """ + Retrieve specific :term:`Provenance` contents of a :term:`Job` based on the request. + + The specific request path is redirected to the relevant command from :mod:`cwlprov`. + If applicable, request :term:`Media-Type` specifiers are considered to return alternate representations. + """ + job = get_job(request) + raise_job_dismissed(job, request) + raise_job_bad_status_success(job, request) + + prov_type = guess_target_format(request, override_user_agent=True, default=ContentType.APP_JSON) + prov_path = request.path.rsplit("/prov", 1)[-1] + prov_path = f"/prov{prov_path}" + prov_data, prov_type = job.prov_data(request, prov_path, prov_type) + if not prov_data: + prov_dir = job.prov_path(request) + prov_exists = os.path.isdir(prov_dir) + prov_err = HTTPNotAcceptable if prov_exists else JobGone + prov_body = { + "title": "NoJobProvenance", + "type": "no-job-provenance", # unofficial + "detail": "Job provenance could not be retrieved for the specified job.", + "cause": "Missing or invalid provenance details." + } + if prov_exists and "run_id" in request.matchdict: + prov_err = JobNotFound + prov_body["error"] = "No such run ID for specified job provenance." + prov_body["value"] = {"run_id": str(request.matchdict["run_id"])} + prov_body["status"] = prov_err.code + return prov_err(json=prov_body, headers={"Content-Type": ContentType.APP_JSON}) + links = job.links(container=request, self_link="provenance") + headers = [("Link", make_link_header(link)) for link in links] + headers.append(("Content-Type", prov_type)) + return HTTPOk(body=prov_data, headers=headers) diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py index 21475c1c2..0076e3cc1 100644 --- a/weaver/wps_restapi/swagger_definitions.py +++ b/weaver/wps_restapi/swagger_definitions.py @@ -117,6 +117,7 @@ JobStatusSchema, ProcessSchema ) +from weaver.provenance import ProvenanceFormat from weaver.quotation.status import QuoteStatus from weaver.sort import Sort, SortMethods from weaver.status import JOB_STATUS_CODE_API, JOB_STATUS_SEARCH_API, Status @@ -334,6 +335,7 @@ TAG_EXCEPTIONS = "Exceptions" TAG_LOGS = "Logs" TAG_STATISTICS = "Statistics" +TAG_PROVENANCE = "Provenance" TAG_VAULT = "Vault" TAG_WPS = "WPS" TAG_DEPRECATED = "Deprecated Endpoints" @@ -359,11 +361,21 @@ jobs_service = Service(name="jobs", path="/jobs") job_service = Service(name="job", path=f"{jobs_service.path}/{{job_id}}") job_results_service = Service(name="job_results", path=f"{job_service.path}/results") -job_exceptions_service = Service(name="job_exceptions", path=f"{job_service.path}/exceptions") job_outputs_service = Service(name="job_outputs", path=f"{job_service.path}/outputs") job_inputs_service = Service(name="job_inputs", path=f"{job_service.path}/inputs") +job_exceptions_service = Service(name="job_exceptions", path=f"{job_service.path}/exceptions") job_logs_service = Service(name="job_logs", path=f"{job_service.path}/logs") job_stats_service = Service(name="job_stats", path=f"{job_service.path}/statistics") +job_prov_service = Service(name="job_prov", path=f"{job_service.path}/prov") +job_prov_info_service = Service(name="job_prov_info", path=f"{job_prov_service.path}/info") +job_prov_who_service = Service(name="job_prov_who", path=f"{job_prov_service.path}/who") +job_prov_inputs_service = Service(name="job_prov_inputs", path=f"{job_prov_service.path}/inputs") +job_prov_inputs_run_service = Service(name="job_prov_inputs_run", path=f"{job_prov_service.path}/inputs/{{run_id}}") +job_prov_outputs_service = Service(name="job_prov_outputs", path=f"{job_prov_service.path}/outputs") +job_prov_outputs_run_service = Service(name="job_prov_outputs_run", path=f"{job_prov_service.path}/outputs/{{run_id}}") +job_prov_run_service = Service(name="job_prov_run", path=f"{job_prov_service.path}/run") +job_prov_run_id_service = Service(name="job_prov_run_id", path=f"{job_prov_service.path}/run/{{run_id}}") +job_prov_runs_service = Service(name="job_prov_runs", path=f"{job_prov_service.path}/runs") processes_service = Service(name="processes", path="/processes") process_service = Service(name="process", path=f"{processes_service.path}/{{process_id}}") @@ -373,6 +385,7 @@ process_visibility_service = Service(name="process_visibility", path=f"{process_service.path}/visibility") process_package_service = Service(name="process_package", path=f"{process_service.path}/package") process_payload_service = Service(name="process_payload", path=f"{process_service.path}/payload") +process_execution_service = Service(name="process_execution", path=f"{process_service.path}/execution") process_jobs_service = Service(name="process_jobs", path=process_service.path + jobs_service.path) process_job_service = Service(name="process_job", path=process_service.path + job_service.path) process_results_service = Service(name="process_results", path=process_service.path + job_results_service.path) @@ -381,23 +394,92 @@ process_exceptions_service = Service(name="process_exceptions", path=process_service.path + job_exceptions_service.path) process_logs_service = Service(name="process_logs", path=process_service.path + job_logs_service.path) process_stats_service = Service(name="process_stats", path=process_service.path + job_stats_service.path) -process_execution_service = Service(name="process_execution", path=f"{process_service.path}/execution") +process_prov_service = Service(name="process_prov", path=process_service.path + job_prov_service.path) +process_prov_info_service = Service(name="process_prov_info", path=process_service.path + job_prov_info_service.path) +process_prov_who_service = Service(name="process_prov_who", path=process_service.path + job_prov_who_service.path) +process_prov_inputs_service = Service( + name="process_prov_inputs", + path=process_service.path + job_prov_inputs_service.path, +) +process_prov_inputs_run_service = Service( + name="process_prov_inputs_run", + path=process_service.path + job_prov_inputs_run_service.path, +) +process_prov_outputs_service = Service( + name="process_prov_outputs", + path=process_service.path + job_prov_outputs_service.path, +) +process_prov_outputs_run_service = Service( + name="process_prov_outputs_run", + path=process_service.path + job_prov_outputs_run_service.path, +) +process_prov_run_service = Service( + name="process_prov_run", + path=process_service.path + job_prov_run_service.path, +) +process_prov_run_id_service = Service( + name="process_prov_run_id", + path=process_service.path + job_prov_run_id_service.path, +) +process_prov_runs_service = Service( + name="process_prov_runs", + path=process_service.path + job_prov_runs_service.path, +) providers_service = Service(name="providers", path="/providers") provider_service = Service(name="provider", path=f"{providers_service.path}/{{provider_id}}") provider_processes_service = Service(name="provider_processes", path=provider_service.path + processes_service.path) provider_process_service = Service(name="provider_process", path=provider_service.path + process_service.path) provider_process_package_service = Service(name="provider_process_pkg", path=f"{provider_process_service.path}/package") +provider_execution_service = Service(name="provider_execution", path=f"{provider_process_service.path}/execution") provider_jobs_service = Service(name="provider_jobs", path=provider_service.path + process_jobs_service.path) provider_job_service = Service(name="provider_job", path=provider_service.path + process_job_service.path) provider_results_service = Service(name="provider_results", path=provider_service.path + process_results_service.path) provider_inputs_service = Service(name="provider_inputs", path=provider_service.path + process_inputs_service.path) provider_outputs_service = Service(name="provider_outputs", path=provider_service.path + process_outputs_service.path) +provider_exceptions_service = Service( + name="provider_exceptions", + path=provider_service.path + process_exceptions_service.path, +) provider_logs_service = Service(name="provider_logs", path=provider_service.path + process_logs_service.path) provider_stats_service = Service(name="provider_stats", path=provider_service.path + process_stats_service.path) -provider_exceptions_service = Service(name="provider_exceptions", - path=provider_service.path + process_exceptions_service.path) -provider_execution_service = Service(name="provider_execution", path=f"{provider_process_service.path}/execution") +provider_prov_service = Service(name="provider_prov", path=provider_service.path + process_prov_service.path) +provider_prov_info_service = Service( + name="provider_prov_info", + path=provider_service.path + process_prov_info_service.path, +) +provider_prov_who_service = Service( + name="provider_prov_who", + path=provider_service.path + process_prov_who_service.path, +) +provider_prov_inputs_service = Service( + name="provider_prov_inputs", + path=provider_service.path + process_prov_inputs_service.path, +) +provider_prov_inputs_run_service = Service( + name="provider_prov_inputs_run", + path=provider_service.path + process_prov_inputs_run_service.path, +) +provider_prov_outputs_service = Service( + name="provider_prov_outputs", + path=provider_service.path + process_prov_outputs_service.path, +) +provider_prov_outputs_run_service = Service( + name="provider_prov_outputs_run", + path=provider_service.path + process_prov_outputs_run_service.path, +) +provider_prov_run_service = Service( + name="provider_prov_run", + path=provider_service.path + process_prov_run_service.path, +) +provider_prov_run_id_service = Service( + name="provider_prov_run_id", + path=provider_service.path + process_prov_run_id_service.path, +) +provider_prov_runs_service = Service( + name="provider_prov_runs", + path=provider_service.path + process_prov_runs_service.path, +) # backward compatibility deprecated routes job_result_service = Service(name="job_result", path=f"{job_service.path}/result") @@ -715,6 +797,12 @@ class ResponseContentTypeHeader(ContentTypeHeader): ]) +class ResponseContentTypePlainTextHeader(ContentTypeHeader): + example = ContentType.TEXT_PLAIN + default = ContentType.TEXT_PLAIN + validator = OneOf([ContentType.TEXT_PLAIN]) + + class PreferHeader(ExtendedSchemaNode): summary = "Header that describes job execution parameters." description = ( @@ -743,6 +831,13 @@ class ResponseHeaders(ExtendedMappingSchema): content_type = ResponseContentTypeHeader() +class ResponsePlainTextHeaders(ResponseHeaders): + """ + Headers describing resulting response. + """ + content_type = ResponseContentTypePlainTextHeader() + + class RedirectHeaders(ResponseHeaders): Location = URL(example="https://job/123/result", description="Redirect resource location.") @@ -5921,7 +6016,7 @@ class CWLTool(PermissiveMappingSchema): class CWLWorkflowStepRunDefinition(AnyOfKeywordSchema): _any_of = [ AnyIdentifier( - title="CWLWorkflowStepRunID", + title="CWLWorkflowSteprun_id", description="Reference to local process ID, with or without '.cwl' extension." ), CWLFileName(), @@ -6398,8 +6493,23 @@ class JobStatisticsSchema(ExtendedMappingSchema): class FrontpageParameterSchema(ExtendedMappingSchema): name = ExtendedSchemaNode(String(), example="api") enabled = ExtendedSchemaNode(Boolean(), example=True) - url = URL(description="Referenced parameter endpoint.", example="https://weaver-host", missing=drop) - doc = ExtendedSchemaNode(String(), example="https://weaver-host/api", missing=drop) + url = URL( + description="Referenced parameter endpoint. Root URL when the functionality implies multiple endpoints.", + example="https://weaver-host", + missing=drop, + ) + doc = ExtendedSchemaNode( + String(), + description="Endpoint where additional documentation can be found about the functionality.", + example="https://weaver-host/api", + missing=drop + ) + api = URL( + String(), + description="OpenAPI documentation endpoint about the functionality.", + example="https://weaver-host/api", + missing=drop, + ) class FrontpageParameters(ExtendedSequenceSchema): @@ -7677,6 +7787,82 @@ class GoneJobResponseSchema(ExtendedMappingSchema): body = ErrorJsonResponseBodySchema() +class JobProvAcceptHeader(AcceptHeader): + validator = OneOf(ProvenanceFormat.media_types()) + + +class JobProvRequestHeaders(RequestHeaders): + accept = JobProvAcceptHeader() + + +class JobProvEndpoint(JobPath): + header = JobProvRequestHeaders() + querystring = FormatQuery() + + +class ProcessJobProvEndpoint(JobProvEndpoint, LocalProcessPath): + pass + + +class ProviderJobProvEndpoint(JobProvEndpoint, ProviderProcessPath): + pass + + +class OkGetJobProvResponseHeaders(ResponseHeaders): + content_type = JobProvAcceptHeader() + + +class OkGetJobProvResponse(ExtendedMappingSchema): + description = "Job provenance details in the requested format." + header = OkGetJobProvResponseHeaders() + + +class JobProvMetadataRequestHeaders(ExtendedMappingSchema): + accept = ResponseContentTypePlainTextHeader() + + +class JobProvMetadataEndpoint(JobPath): + header = JobProvMetadataRequestHeaders() + + +class ProcessJobProvMetadataEndpoint(JobProvMetadataEndpoint, LocalProcessPath): + pass + + +class ProviderJobProvMetadataEndpoint(JobProvMetadataEndpoint, ProviderProcessPath): + pass + + +class JobProvMetadataResponseBody(ExtendedSchemaNode): + schema_type = String + description = "Multipart file contents for upload to the vault." + + +class OkGetJobProvMetadataResponse(ExtendedMappingSchema): + description = "Job execution provenance metadata relative to the contextual request path." + header = ResponsePlainTextHeaders() + body = JobProvMetadataResponseBody() + + +class NotFoundJobProvResponseSchema(NotFoundResponseSchema): + description = ( + "Job reference UUID cannot be found, or a specified provenance " + "run UUID cannot be retrieved from the Workflow execution steps." + ) + header = ResponseHeaders() + body = ErrorJsonResponseBodySchema() + + +class GoneJobProvResponseSchema(ExtendedMappingSchema): + description = ( + "Job reference contents have been removed or does not contain PROV metadata. " + "This could be because the job was created before provenance support was enabled, " + "or because a job retention period deleted the contents." + ) + header = ResponseHeaders() + body = ErrorJsonResponseBodySchema() + + class OkGetJobInputsResponse(ExtendedMappingSchema): header = ResponseHeaders() body = JobInputsBody() @@ -8196,8 +8382,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "406": NotAcceptableErrorResponseSchema(), "422": UnprocessableEntityResponseSchema(), } -get_prov_all_jobs_responses = copy(get_all_jobs_responses) -get_prov_all_jobs_responses.update({ +get_provider_all_jobs_responses = copy(get_all_jobs_responses) +get_provider_all_jobs_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) get_single_job_status_responses = { @@ -8217,8 +8403,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "406": NotAcceptableErrorResponseSchema(), "500": InternalServerErrorResponseSchema(), } -get_prov_single_job_status_responses = copy(get_single_job_status_responses) -get_prov_single_job_status_responses.update({ +get_provider_single_job_status_responses = copy(get_single_job_status_responses) +get_provider_single_job_status_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) patch_job_responses = { @@ -8246,8 +8432,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "410": GoneJobResponseSchema(), "500": InternalServerErrorResponseSchema(), } -delete_prov_job_responses = copy(delete_job_responses) -delete_prov_job_responses.update({ +delete_provider_job_responses = copy(delete_job_responses) +delete_provider_job_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) get_job_inputs_responses = { @@ -8263,8 +8449,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "406": NotAcceptableErrorResponseSchema(), "500": InternalServerErrorResponseSchema(), } -get_prov_inputs_responses = copy(get_job_inputs_responses) -get_prov_inputs_responses.update({ +get_provider_inputs_responses = copy(get_job_inputs_responses) +get_provider_inputs_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) get_job_outputs_responses = { @@ -8281,8 +8467,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "410": GoneJobResponseSchema(), "500": InternalServerErrorResponseSchema(), } -get_prov_outputs_responses = copy(get_job_outputs_responses) -get_prov_outputs_responses.update({ +get_provider_outputs_responses = copy(get_job_outputs_responses) +get_provider_outputs_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) get_result_redirect_responses = { @@ -8303,11 +8489,11 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "410": GoneJobResponseSchema(), "500": InternalServerErrorResponseSchema(), } -get_prov_results_responses = copy(get_job_results_responses) -get_prov_results_responses.update({ +get_provider_results_responses = copy(get_job_results_responses) +get_provider_results_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) -get_exceptions_responses = { +get_job_exceptions_responses = { "200": OkGetJobExceptionsResponse(description="success", examples={ "JobExceptions": { "summary": "Job exceptions that occurred during failing process execution.", @@ -8321,11 +8507,11 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "410": GoneJobResponseSchema(), "500": InternalServerErrorResponseSchema(), } -get_prov_exceptions_responses = copy(get_exceptions_responses) -get_prov_exceptions_responses.update({ +get_provider_exceptions_responses = copy(get_job_exceptions_responses) +get_provider_exceptions_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) -get_logs_responses = { +get_job_logs_responses = { "200": OkGetJobLogsResponse(description="success", examples={ "JobLogs": { "summary": "Job logs registered and captured throughout process execution.", @@ -8339,11 +8525,11 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "410": GoneJobResponseSchema(), "500": InternalServerErrorResponseSchema(), } -get_prov_logs_responses = copy(get_logs_responses) -get_prov_logs_responses.update({ +get_provider_logs_responses = copy(get_job_logs_responses) +get_provider_logs_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) -get_stats_responses = { +get_job_stats_responses = { "200": OkGetJobStatsResponse(description="success", examples={ "JobStatistics": { "summary": "Job statistics collected following process execution.", @@ -8357,10 +8543,58 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema): "410": GoneJobResponseSchema(), "500": InternalServerErrorResponseSchema(), } -get_prov_stats_responses = copy(get_stats_responses) -get_prov_stats_responses.update({ +get_provider_stats_responses = copy(get_job_stats_responses) +get_provider_stats_responses.update({ "403": ForbiddenProviderLocalResponseSchema(), }) +get_job_prov_responses = { + "200": OkGetJobProvResponse( + description="Successful job PROV details.", + examples={ + "PROV-JSON": { + "summary": "Provenance details returned in PROV-JSON format.", + "value": EXAMPLES["job_prov.json"], + }, + "PROV-N": { + "summary": "Provenance details returned in PROV-N format.", + "value": EXAMPLES["job_prov.txt"], + }, + "PROV-XML": { + "summary": "Provenance details returned in PROV-XML format.", + "value": EXAMPLES["job_prov.xml"], + } + } + ), + "400": InvalidJobResponseSchema(), + "404": NotFoundJobProvResponseSchema(), + "406": NotAcceptableErrorResponseSchema(), + "410": GoneJobProvResponseSchema(), + "500": InternalServerErrorResponseSchema(), +} +get_job_prov_metadata_responses = { + "200": OkGetJobProvMetadataResponse( + description="Successful job PROV metadata retrieval.", + examples={ + "PROV run": { + "summary": "Provenance metadata of the run execution.", + "value": EXAMPLES["job_prov_run.txt"], + }, + "PROV who": { + "summary": "Provenance metadata of who ran the job.", + "value": EXAMPLES["job_prov_who.txt"], + }, + "PROV info": { + "summary": "Provenance metadata about the Research Object packaging information.", + "value": EXAMPLES["job_prov_info.txt"], + } + } + ), + "400": InvalidJobResponseSchema(), + "404": NotFoundJobProvResponseSchema(), + "406": NotAcceptableErrorResponseSchema(), + "410": GoneJobProvResponseSchema(), + "500": InternalServerErrorResponseSchema(), +} get_quote_list_responses = { "200": OkGetQuoteListResponse(description="success"), "405": MethodNotAllowedErrorResponseSchema(), diff --git a/weaver/wps_restapi/templates/responses/frontpage.mako b/weaver/wps_restapi/templates/responses/frontpage.mako index 572d17a5b..858325de9 100644 --- a/weaver/wps_restapi/templates/responses/frontpage.mako +++ b/weaver/wps_restapi/templates/responses/frontpage.mako @@ -80,6 +80,22 @@
Enabled:
${util.render_bool(param.enabled)} + %if "api" in param: +
+
OpenAPI:
+ +
+ %endif + %if "doc" in param: +
+
DOC:
+ +
+ %endif %if "url" in param:
URL: