diff --git a/CHANGES.rst b/CHANGES.rst
index 263abe3c8..a7dae625a 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -14,10 +14,32 @@ Changes:
--------
- Add support of Python 3.13.
- Drop support of Python 3.8.
-
-Fixes:
-------
-- No change.
+- Add support of *OGC API - Processes - Part 4: Job Management* related to ``PROV`` requirement and conformance classes.
+- Add support of `W3C PROV `_ to provide ``GET /jobs/{jobId}/prov`` endpoints
+ and all underlying paths (``/info``, ``/who``, ``/run``, ``/inputs``, ``/outputs``, and ``../{runId}`` variants)
+ to retrieve provenance metadata from a `Job` execution and its corresponding `Process` and `Workflow` definitions,
+ as processed by ``cwltool``/``cwlprov`` and extended by `Weaver`-specific server metadata.
+ Supported ``PROV`` representations are ``PROV-N``, ``PROV-NT``, ``PROV-JSON``, ``PROV-JSONLD``, ``PROV-XML``
+ and ``PROV-TURTLE``, each of which can be obtained by providing the corresponding ``Accept`` headers.
+- Add ``weaver.cwl_prov`` configuration option to control the new ``PROV`` metadata collection feature.
+- Add ``prov`` and ``provenance`` CLI and ``WeaverClient`` operations.
+- Extend ``weaver.cli.WeaverArgumentParser`` "*rules*" to allow returning an error message providing better
+ case-by-case details about the specific cause of failure handled by the *rule* callable.
+- Update certain ``cornice`` service definitions that were using "``prov``" as referencing to `Providers` to avoid
+ confusion with the multiple ``PROV``/`Provenance` related terminology and services added for the new feature.
+- Pin ``cwltool==3.1.20241217163858`` to employ the official release including
+ ``PROV`` configuration provided to easily configured `Weaver`
+ (relates to `common-workflow-language/cwltool#2082 _)
+ and integrate previously provided fixes
+ (relates to `common-workflow-language/cwltool#2082 _)
+ that were applied by a forked backport ``https://github.com/fmigneault/cwltool`` repository.
+
+Fixes:
+------
+- Fix missing documentation about certain ``WeaverClient`` operations.
+- Fix ``weaver.cli.OperationResult`` not setting its ``text`` property when a valid non-`JSON` response is obtained.
+- Fix the `API` frontpage `HTML` rendering to returning enabled features and corresponding ``doc``/``url``/``api``
+ endpoints for quick referencing the capabilities activated for a `Weaver` instance.
.. _changes_6.0.0:
diff --git a/config/weaver.ini.example b/config/weaver.ini.example
index 03aa221bb..b8a682ee9 100644
--- a/config/weaver.ini.example
+++ b/config/weaver.ini.example
@@ -100,6 +100,11 @@ weaver.cwl_egid =
weaver.cwl_processes_dir =
weaver.cwl_processes_register_error = false
+# provenance functionality
+# if disabled, provenance details will not be collected when running Application Packages and Workflows
+# if disabled, the '/jobs/{jobId}/prov' endpoint will always report missing information since unavailable
+weaver.cwl_prov = true
+
# --- Weaver WPS settings ---
weaver.wps = true
weaver.wps_url =
@@ -128,7 +133,7 @@ weaver.wps_metadata_identification_keywords=Weaver,WPS,OGC
# access constraints can be comma-separated
weaver.wps_metadata_identification_accessconstraints=NONE
weaver.wps_metadata_identification_fees=NONE
-weaver.wps_metadata_provider_name=CRIM
+weaver.wps_metadata_provider_name=Computer Research Institute of Montréal (CRIM)
weaver.wps_metadata_provider_url=http://pavics-weaver.readthedocs.org/en/latest/
weaver.wps_metadata_contact_name=Francis Charette-Migneault
weaver.wps_metadata_contact_position=Research Software Developer
diff --git a/docs/source/appendix.rst b/docs/source/appendix.rst
index de18c19da..ea0f76d58 100644
--- a/docs/source/appendix.rst
+++ b/docs/source/appendix.rst
@@ -250,6 +250,15 @@ Glossary
Entity that describes the required inputs, produced outputs, and any applicable metadata for the execution of
the defined script, calculation, or operation.
+ PROV
+ Provenance
+ Metadata using the :term:`W3C` |PROV|_ standard that is applied to a submitted :term:`Job` execution to allow
+ retrieving its origin, the related :term:`Application Package`, its :term:`I/O` sources and results, as well as
+ additional details about the server host and runtime user as applicable to replicate the experiment.
+
+ .. seealso::
+ :ref:`proc_op_job_prov`
+
Provider
Entity that offers an ensemble of :term:`Process` under it. It is typically a reference to a remote service,
where any :term:`Process` it provides is fetched dynamically on demand.
@@ -331,6 +340,9 @@ Glossary
Since |ogc-api-standards|_ are based on HTTP and web communications, this consortium establishes the
common foundation definitions used by the :term:`API` specifications.
+ .. seealso::
+ |w3c|_
+
WKT
Well-Known Text geometry representation.
diff --git a/docs/source/cli.rst b/docs/source/cli.rst
index 179a230c1..0603ddc74 100644
--- a/docs/source/cli.rst
+++ b/docs/source/cli.rst
@@ -33,14 +33,29 @@ Python Client Commands
For details about using the Python :py:class:`weaver.cli.WeaverClient`, please refer directly to its class
documentation and its underlying methods.
+* :py:meth:`weaver.cli.WeaverClient.info`
+* :py:meth:`weaver.cli.WeaverClient.version`
+* :py:meth:`weaver.cli.WeaverClient.conformance`
+* :py:meth:`weaver.cli.WeaverClient.register`
+* :py:meth:`weaver.cli.WeaverClient.unregister`
* :py:meth:`weaver.cli.WeaverClient.deploy`
* :py:meth:`weaver.cli.WeaverClient.undeploy`
* :py:meth:`weaver.cli.WeaverClient.capabilities`
* :py:meth:`weaver.cli.WeaverClient.describe`
+* :py:meth:`weaver.cli.WeaverClient.package`
+* :py:meth:`weaver.cli.WeaverClient.jobs`
+* :py:meth:`weaver.cli.WeaverClient.trigger_job`
+* :py:meth:`weaver.cli.WeaverClient.update_job`
* :py:meth:`weaver.cli.WeaverClient.execute`
* :py:meth:`weaver.cli.WeaverClient.monitor`
-* :py:meth:`weaver.cli.WeaverClient.dismiss`
* :py:meth:`weaver.cli.WeaverClient.status`
+* :py:meth:`weaver.cli.WeaverClient.inputs`
+* :py:meth:`weaver.cli.WeaverClient.outputs`
+* :py:meth:`weaver.cli.WeaverClient.logs`
+* :py:meth:`weaver.cli.WeaverClient.statistics`
+* :py:meth:`weaver.cli.WeaverClient.exceptions`
+* :py:meth:`weaver.cli.WeaverClient.provenance`
+* :py:meth:`weaver.cli.WeaverClient.dismiss`
* :py:meth:`weaver.cli.WeaverClient.results`
* :py:meth:`weaver.cli.WeaverClient.upload`
@@ -479,6 +494,59 @@ Sample Output:
.. literalinclude:: ../../weaver/wps_restapi/examples/job_results.json
:language: json
+.. _cli_example_job_prov:
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Job Provenance Example
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Accomplishes the :term:`Job` |PROV|_ request to obtain :term:`Provenance` metadata.
+
+Below examples employ the ``Echo`` :term:`Process` available in |weaver-func-test-apps|_
+and assume the referenced :term:`Job` was completed successfully.
+
+.. note::
+ There fore multiple alternative format representations offered by this operation.
+ Not all of them are presented below. See the various ``prov_type`` and ``prov_format``
+ parameters for the combinations.
+
+.. seealso::
+ - :ref:`proc_op_job_prov` provides more details about available endpoints, operations and metadata returned.
+
+.. code-block:: shell
+ :caption: Command Line
+
+ weaver prov -u ${WEAVER_URL} -j "1c49f085-bbd7-410d-a801-81fd42469e8a" --pT run
+
+.. code-block:: python
+ :caption: Python
+
+ from weaver.provenance import ProvenancePathType
+
+ client.prov("1c49f085-bbd7-410d-a801-81fd42469e8a", prov_type=ProvenancePathType.PROV_RUN)
+
+Sample Output:
+
+.. literalinclude:: ../../weaver/wps_restapi/examples/job_prov_run.txt
+ :language: text
+
+.. code-block:: shell
+ :caption: Command Line
+
+ weaver prov -u ${WEAVER_URL} -nL --pF "PROV-JSON"
+
+.. code-block:: python
+ :caption: Python
+
+ from weaver.provenance import ProvenanceFormat
+
+ client.prov("1c49f085-bbd7-410d-a801-81fd42469e8a", prov_format=ProvenanceFormat.PROV_N)
+
+Sample Output:
+
+.. literalinclude:: ../../weaver/wps_restapi/examples/job_prov.txt
+ :language: text
+
.. _cli_example_upload:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst
index cd3c97944..d7aa1c398 100644
--- a/docs/source/configuration.rst
+++ b/docs/source/configuration.rst
@@ -101,6 +101,23 @@ they are optional and which default value or operation is applied in each situat
.. versionadded:: 1.9
+.. _weaver-cwl-prov:
+
+- | ``weaver.cwl_prov = true|false`` [:class:`bool`-like]
+ | (default: ``true``)
+ |
+ | Configure whether :term:`W3C` |PROV|_ functionality using the :ref:`proc_op_job_prov` endpoints should be enabled
+ to collect :term:`Provenance` metadata when executing the underlying :term:`CWL` of a given :term:`Process`
+ or :term:`Workflow`.
+
+ .. note::
+
+ Any pre-existing :term:`Job` that was created when this option did not yet exist or that was executed while
+ it was disabled will not offer :term:`Provenance` metadata. This is intrinsic to the functionality that must obtain
+ timely metadata *while* executing to properly represent operational steps and :term:`Job` updates as they occur.
+
+ .. versionadded:: 6.1
+
.. _weaver-wps:
- | ``weaver.wps = true|false`` [:class:`bool`-like]
diff --git a/docs/source/processes.rst b/docs/source/processes.rst
index 1bedb0d49..e415e28d1 100644
--- a/docs/source/processes.rst
+++ b/docs/source/processes.rst
@@ -173,7 +173,7 @@ through some parsing (e.g.: :ref:`proc_wps_12`) or with some requirement indicat
special handling. The represented :term:`Process` is aligned with |ogc-api-proc|_ specifications.
When deploying one such :term:`Process` directly, it is expected to have a definition specified
-with a :term:`CWL` `Application Package`_, which provides resources about one of the described :ref:`app_pkg_types`.
+with a :term:`CWL` :ref:`application-package`, which provides resources about one of the described :ref:`app_pkg_types`.
This is most of the time employed to wrap operations packaged in a reference :term:`Docker` image, but it can also
wrap :ref:`app_pkg_remote` to be executed on another server (i.e.: :term:`ADES`). When the :term:`Process` should be
@@ -490,6 +490,8 @@ the |getcap-req|_ request.
Modify an Existing Process (Update, Replace, Undeploy)
-----------------------------------------------------------------------------
+.. versionadded:: 4.20
+
Since `Weaver` supports |ogc-api-proc-part2|_, it is able to remove a previously registered :term:`Process` using
the :ref:`Deployment ` request. The undeploy operation consist of a ``DELETE`` request targeting the
specific ``{WEAVER_URL}/processes/{processID}`` to be removed.
@@ -498,8 +500,6 @@ specific ``{WEAVER_URL}/processes/{processID}`` to be removed.
The :term:`Process` must be accessible by the user considering any visibility configuration to perform this step.
See :ref:`proc_op_deploy` section for details.
-.. versionadded:: 4.20
-
Starting from version `4.20 `_, a :term:`Process` can be replaced or
updated using respectively the ``PUT`` and ``PATCH`` requests onto the specific ``{WEAVER_URL}/processes/{processID}``
location of the reference to modify.
@@ -1989,7 +1989,7 @@ the configured :term:`WPS` output directory.
Header ``X-WPS-Output-Context`` is ignored when using `S3` buckets for output location since they are stored
individually per :term:`Job` UUID, and hold no relevant *context* location. See also :ref:`conf_s3_buckets`.
-.. versionadded:: 4.3
+.. versionchanged:: 4.3
Addition of the ``X-WPS-Output-Context`` header.
.. _proc_op_execute_subscribers:
@@ -2419,10 +2419,118 @@ Note again that the more the :term:`Process` is verbose, the more tracking will
Job Provenance
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-.. fixme: CWL and Job Prov (https://github.com/crim-ca/weaver/issues/673)
-.. todo::
- implement ``GET /jobs/{jobID}/run`` and/or ``GET /jobs/{jobID}/prov``
- (see https://github.com/crim-ca/weaver/issues/673)
+.. versionadded:: 6.1
+
+The provenance endpoints allow to obtain :term:`W3C` |PROV|_ metadata from a successfully completed :term:`Job`
+using various representations. This provenance information can help identify traceability information such as the input
+data sources, validate output checksums, and understand all internal :term:`Process` data transformations that were
+involved within an executed :term:`Workflow`.
+
+The |PROV|_ metadata consists of information records about entities, activities, and people involved in producing a
+piece of data or thing |PROV-dfn|_, which can be used to form assessments about its quality, reliability or
+trustworthiness.
+
+.. |PROV-dfn| replace:: :sup:`[^]`
+.. _PROV-dfn: https://www.w3.org/TR/2013/REC-prov-dm-20130430/#dfn-provenance
+
+.. seealso::
+ - |PROV-overview|_
+ - |cwltool-cwlprov|_
+
+.. figure:: https://www.w3.org/TR/2013/REC-prov-o-20130430/diagrams/starting-points.svg
+ :alt: PROV-O Resources
+ :target: `PROV-O`_
+ :align: center
+ :width: 500px
+
+ Provenance Resource Relationships [|PROV-O|_]
+
+
+The provenance endpoints are provided in alignment with the |ogc-api-proc-part4|_ provenance class requirement.
+However, `Weaver` also provides additional functionalities in comparison to the minimal requirements from the
+:term:`OGC` specification.
+
+Following is a table of available formats and corresponding endpoints offered by `Weaver`.
+
+.. list-table:: Job Provenance Endpoints
+ :name: table-job-prov
+ :align: center
+ :header-rows: 1
+ :widths: 25,10,20,45
+
+ * - Endpoint
+ - |PROV|_ Format
+ - :term:`Media-Type`
+ - Description
+ * - ``/jobs/{jobID}/prov``
+ - |PROV-JSON|_
+ - ``application/json``
+ - :term:`Provenance` metadata using :term:`JSON` representation.
+ * - ``/jobs/{jobID}/prov``
+ - |PROV-JSONLD|_
+ - ``application/ld+json``
+ - :term:`Provenance` metadata using |JSON-LD|_ representation.
+ * - ``/jobs/{jobID}/prov``
+ - |PROV-XML|_
+ - ``text/xml`` or ``application/xml``
+ - :term:`Provenance` metadata using :term:`XML` representation.
+ * - ``/jobs/{jobID}/prov``
+ - |PROV-N|_
+ - ``text/provenance-notation``
+ - :term:`Provenance` metadata using the main |PROV|_ notation representation.
+ * - ``/jobs/{jobID}/prov``
+ - PROV-NT
+ - ``application/n-triples``
+ - :term:`Provenance` metadata using |rdf-n-triples|_ (NT) representation.
+ * - ``/jobs/{jobID}/prov``
+ - PROV-TURTLE
+ - ``text/turtle``
+ - :term:`Provenance` metadata using |rdf-turtle|_ (TTL) representation.
+ * - ``/jobs/{jobID}/prov/info``
+ - |na|
+ - ``text/plain``
+ - Metadata about the *Research Object* packaging information.
+ * - ``/jobs/{jobID}/prov/who``
+ - |na|
+ - ``text/plain``
+ - Metadata of who ran the :term:`Job`.
+ * - ``/jobs/{jobID}/prov/runs``
+ - |na|
+ - ``text/plain``
+ - Obtain the list of ``runID`` steps of the :term:`Workflow` within the :term:`Job`.
+ * - ``/jobs/{jobID}/prov/run``
+ - |na|
+ - ``text/plain``
+ - Metadata of the main :term:`Job` and any nested step runs in the case of a :term:`Workflow`.
+ * - ``/jobs/{jobID}/prov/inputs``
+ - |na|
+ - ``text/plain``
+ - Metadata about the :term:`Job` input IDs.
+ * - ``/jobs/{jobID}/prov/outputs``
+ - |na|
+ - ``text/plain``
+ - Metadata about the :term:`Job` output IDs.
+ * - ``/jobs/{jobID}/prov/[run|inputs|outputs]/{runID}``
+ - |na|
+ - ``text/plain``
+ - Same as their respective definitions above, but for a specific step of a :term:`Workflow`.
+
+.. seealso::
+ This feature is enabled by default. Its functionality and the corresponding :term:`API` endpoints
+ can be controlled using :ref:`Configuration Option ` ``weaver.cwl_prov``.
+
+Resulting metadata that is collected from :term:`Job` :term:`Provenance` will be stored under a similar endpoint
+as the :ref:`exec_output_location`, except with an additional ``-prov`` suffix applied after the :term:`Job` UUID,
+as shown below.
+This location is selected to conveniently offer the ``PROV`` metadata with a different parent directory than
+the :term:`Job` outputs, therefore allowing different endpoint access control schemes between the ``PROV`` metadata
+and actual output data, while also reusing the configured :ref:`exec_output_location` that can be used to quickly
+serve :term:`Provenance` contents without any additional configuration.
+
+.. code-block::
+
+ {WPS_OUTPUT_URL}[/{WPS_OUTPUT_CONTEXT}]/{JOB_UUID}-prov
+
.. _proc_op_job_stats:
diff --git a/docs/source/references.rst b/docs/source/references.rst
index 8bbf7831b..36a617dc3 100644
--- a/docs/source/references.rst
+++ b/docs/source/references.rst
@@ -62,6 +62,8 @@
.. |cwl-metadata-schema-org| replace:: RDF Schema Definitions
.. _cwl-metadata-schema-org: https://schema.org/version/latest/schemaorg-current-https.rdf
.. _docker: https://docs.docker.com/develop/
+.. |cwltool-cwlprov| replace:: CWLProv - Provenance Capture with :mod:`cwltool`
+.. _cwltool-cwlprov: https://cwltool.readthedocs.io/en/latest/CWLProv.html
.. |docker| replace:: Docker
.. |ems| replace:: Execution Management Service
.. |esgf| replace:: Earth System Grid Federation
@@ -172,6 +174,26 @@
.. _openeo-api: https://openeo.org/documentation/1.0/developers/api/reference.html
.. |OpenAPI-spec| replace:: OpenAPI Specification
.. _OpenAPI-spec: https://spec.openapis.org/oas/v3.1.0
+.. |JSON-LD| replace:: JSON Linked Data
+.. _JSON-LD: https://json-ld.org/
+.. |PROV| replace:: PROV
+.. _PROV: https://www.w3.org/TR/prov-overview/
+.. |PROV-JSON| replace:: PROV-JSON
+.. _PROV-JSON: https://www.w3.org/submissions/prov-json/
+.. |PROV-JSONLD| replace:: PROV-JSONLD
+.. _PROV-JSONLD: https://www.w3.org/submissions/prov-jsonld/
+.. |PROV-N| replace:: PROV-N
+.. _PROV-N: https://www.w3.org/TR/prov-n/
+.. |PROV-overview| replace:: PROV Overview
+.. _PROV-overview: https://www.w3.org/TR/prov-overview/
+.. |PROV-O| replace:: PROV-O: The PROV Ontology
+.. _PROV-O: https://www.w3.org/TR/2013/REC-prov-o-20130430/
+.. |PROV-XML| replace:: PROV-XML
+.. _PROV-XML: https://www.w3.org/TR/2013/NOTE-prov-xml-20130430/
+.. |rdf-n-triples| replace:: RDF N-Triples
+.. _rdf-n-triples: https://www.w3.org/TR/n-triples/
+.. |rdf-turtle| replace:: RDF Turtle
+.. _rdf-turtle: https://www.w3.org/TR/rdf12-turtle/
.. |pywps| replace:: PyWPS
.. _pywps: https://github.com/geopython/pywps/
.. |pywps-status| replace:: Progress and Status Report
@@ -184,6 +206,8 @@
.. _weaver-issues: https://github.com/crim-ca/weaver/issues
.. |submit-issue| replace:: submit a new issue
.. _submit-issue: https://github.com/crim-ca/weaver/issues/new/choose
+.. |w3c| replace:: W3C
+.. _w3c: https://www.w3.org/
.. STAC
.. |stac-spec| replace:: STAC Specification
@@ -202,7 +226,7 @@
.. Example references
.. |examples| replace:: Examples
.. _examples: examples.rst
-.. |weaver-func-test-apps| replace:: Weaver functional tests
+.. |weaver-func-test-apps| replace:: Weaver functional tests Application Packages
.. _weaver-func-test-apps: https://github.com/crim-ca/weaver/tree/master/tests/functional/application-packages
.. |ogc-testbeds-apps| replace:: OGC-Testbeds Applications
.. _ogc-testbeds-apps: https://github.com/crim-ca/application-packages
diff --git a/requirements.txt b/requirements.txt
index 1702261d4..7c8643e91 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -42,8 +42,10 @@ cryptography
# use cwltool gpu-enabled support until integrated within the original tool
# (https://github.com/common-workflow-language/common-workflow-language/issues/587)
### git+https://github.com/crim-ca/cwltool@docker-gpu#egg=cwltool
-##cwltool==3.1.20230906142556
-cwltool @ git+https://github.com/fmigneault/cwltool.git@fix-load-contents-array
+cwltool==3.1.20241217163858
+# for some reason, not the same release number,
+# but same code as https://github.com/common-workflow-language/cwlprov/tree/0.6.0
+cwlprov==0.1.1
dnspython>=2.6.1 # not directly required, pinned by Snyk to avoid a vulnerability; via pymongo dependency
docker>=7.1
duration
diff --git a/setup.cfg b/setup.cfg
index 5125bd37e..b4cca5ff1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -69,6 +69,7 @@ markers =
remote: mark test with remote Weaver instance requirement
vault: mark test with Vault file feature validation
html: mark test as related to HTML rendering
+ prov: mark test as related to PROV operations
oap_part1: mark test as 'OGC API - Processes - Part 1: Core' functionalities
oap_part2: mark test as 'OGC API - Processes - Part 2: Deploy, Replace, Undeploy (DRU)' functionalities
oap_part3: mark test as 'OGC API - Processes - Part 3: Workflows and Chaining' functionalities
diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py
index 028b66679..316742523 100644
--- a/tests/functional/test_cli.py
+++ b/tests/functional/test_cli.py
@@ -15,6 +15,7 @@
import mock
import pytest
+import yaml
from owslib.ows import DEFAULT_OWS_NAMESPACE
from owslib.wps import WPSException
from parameterized import parameterized
@@ -22,6 +23,7 @@
from webtest import TestApp as WebTestApp
from tests import resources
+from tests.functional.test_job_provenance import TestJobProvenanceBase
from tests.functional.utils import JobUtils, ResourcesUtil, WpsConfigBase
from tests.utils import (
get_weaver_url,
@@ -45,6 +47,7 @@
from weaver.notify import decrypt_email
from weaver.processes.constants import CWL_REQUIREMENT_APP_DOCKER, ProcessSchema
from weaver.processes.types import ProcessType
+from weaver.provenance import ProvenanceFormat, ProvenancePathType
from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory
from weaver.utils import fully_qualified_name, get_registry
from weaver.visibility import Visibility
@@ -2566,3 +2569,82 @@ def test_describe_auth(self):
assert any(f"\"id\": \"{proc}\"" in line for line in lines)
assert any("\"inputs\": {" in line for line in lines)
assert any("\"outputs\": {" in line for line in lines)
+
+
+@pytest.mark.prov
+class TestWeaverClientProv(TestWeaverClientBase, TestJobProvenanceBase):
+ def setUp(self):
+ # purposely omit 'TestWeaverClientBase' setup to
+ # avoid clearing the generated job with PROV metadata
+ TestJobProvenanceBase.setUp(self)
+
+ def test_prov(self):
+ result = mocked_sub_requests(self.app, self.client.prov, self.job_url)
+ assert result.success
+ assert result.headers["Content-Type"] == ContentType.APP_JSON
+ assert isinstance(result.body, dict), "body should be the PROV-JSON"
+ assert "actedOnBehalfOf" in result.body
+ assert "agent" in result.body
+ assert "crim-ca/weaver" in str(result.body["agent"])
+ assert "cwltool" in str(result.body["agent"])
+
+ def test_prov_yaml_by_output_format(self):
+ result = mocked_sub_requests(self.app, self.client.prov, self.job_url, output_format=OutputFormat.YAML)
+ assert result.success
+ assert result.headers["Content-Type"] == ContentType.APP_JSON, "original type should still be JSON (from API)"
+ assert isinstance(result.body, dict), "response body should still be the original PROV-JSON"
+ assert isinstance(result.text, str), "text property should be the PROV-JSON represented as YAML string"
+ assert yaml.safe_load(result.text) == result.body, "PROV-JSON contents should be identical in YAML format"
+ assert "actedOnBehalfOf" in result.text
+ assert "agent" in result.text
+ assert "crim-ca/weaver" in str(result.text)
+ assert "cwltool" in str(result.text)
+
+ def test_prov_xml_by_prov_format(self):
+ result = mocked_sub_requests(self.app, self.client.prov, self.job_url, prov_format=ProvenanceFormat.PROV_XML)
+ assert result.success
+ assert result.headers["Content-Type"] == ContentType.APP_XML, "original type should still be XML (from API)"
+ assert isinstance(result.body, str), "body should be the PROV-XML representation"
+ assert "actedOnBehalfOf" in result.body
+ assert "agent" in result.body
+ assert "crim-ca/weaver" in str(result.body)
+ assert "cwltool" in str(result.body)
+
+ def test_prov_info(self):
+ result = mocked_sub_requests(self.app, self.client.prov, self.job_url, prov=ProvenancePathType.PROV_INFO)
+ assert result.success
+ assert result.headers["Content-Type"] == ContentType.TEXT_PLAIN
+ assert "Research Object of CWL workflow run" in result.text
+ assert self.job_id in result.text
+
+ def test_prov_run(self):
+ result = mocked_sub_requests(self.app, self.client.prov, self.job_url, prov=ProvenancePathType.PROV_RUN)
+ assert result.success
+ assert result.headers["Content-Type"] == ContentType.TEXT_PLAIN
+ assert self.proc_id in result.text
+ assert self.job_id in result.text
+ assert "< wf:main/message" in result.text, (
+ "Indication of inward input 'message' ID should be present"
+ )
+ assert f"> wf:main/{self.proc_id}/output" in result.text, (
+ "Indication of outward result 'output' ID should be present"
+ )
+
+ def test_prov_run_with_id(self):
+ result = mocked_sub_requests(
+ self.app,
+ self.client.prov,
+ self.job_url,
+ prov=ProvenancePathType.PROV_RUN,
+ prov_run_id=self.job_id, # redundant in this case, but test that parameter is parsed and resolves
+ )
+ assert result.success
+ assert result.headers["Content-Type"] == ContentType.TEXT_PLAIN
+ assert self.proc_id in result.text
+ assert self.job_id in result.text
+ assert "< wf:main/message" in result.text, (
+ "Indication of inward input 'message' ID should be present"
+ )
+ assert f"> wf:main/{self.proc_id}/output" in result.text, (
+ "Indication of outward result 'output' ID should be present"
+ )
diff --git a/tests/functional/test_job_provenance.py b/tests/functional/test_job_provenance.py
new file mode 100644
index 000000000..eafb7d980
--- /dev/null
+++ b/tests/functional/test_job_provenance.py
@@ -0,0 +1,298 @@
+import contextlib
+import copy
+import itertools
+import os
+import uuid
+from typing import TYPE_CHECKING
+
+import pytest
+from parameterized import parameterized
+
+from tests.functional.utils import ResourcesUtil, WpsConfigBase
+from tests.utils import mocked_execute_celery, mocked_sub_requests, mocked_wps_output
+from weaver.formats import ContentType, OutputFormat
+from weaver.provenance import ProvenanceFormat, ProvenancePathType
+from weaver.status import Status
+
+if TYPE_CHECKING:
+ from typing import Optional
+
+ from weaver.typedefs import AnyUUID
+
+
+@pytest.mark.prov
+class TestJobProvenanceBase(WpsConfigBase, ResourcesUtil):
+ job_id = None # type: Optional[AnyUUID]
+ job_url = None # type: Optional[str]
+ proc_id = None # type: Optional[str]
+
+ @classmethod
+ def setUpClass(cls) -> None:
+ cls.settings = copy.deepcopy(cls.settings or {})
+ settings = {
+ "weaver.cwl_prov": True,
+ "weaver.wps_metadata_provider_name": "TestJobProvenanceBase", # metadata employed by PROV
+ "weaver.wps_metadata_provider_url": "http://localhost/", # metadata employed by PROV
+ "weaver.wps": True,
+ "weaver.wps_path": "/ows/wps",
+ "weaver.wps_restapi_path": "/",
+ "weaver.wps_output_path": "/wpsoutputs",
+ "weaver.wps_output_url": "http://localhost/wpsoutputs",
+ "weaver.wps_output_dir": "/tmp/weaver-test/wps-outputs", # nosec: B108 # don't care hardcoded for test
+ }
+ cls.settings.update(settings)
+ super(TestJobProvenanceBase, cls).setUpClass()
+ cls.setup_test_job()
+
+ @classmethod
+ def tearDownClass(cls):
+ cls.process_store.clear_processes()
+ cls.job_store.clear_jobs()
+ super(TestJobProvenanceBase, cls).tearDownClass()
+
+ @classmethod
+ def setup_test_job(cls):
+ cls.proc_id = cls.fully_qualified_test_name(cls, "Echo")
+ cwl = cls.retrieve_payload("Echo", "package", local=True)
+ body = {
+ "processDescription": {
+ "id": cls.proc_id,
+ },
+ "executionUnit": [{"unit": cwl}],
+ }
+ cls.deploy_process(body)
+ data = {
+ "inputs": {"message": "0123456789"},
+ }
+ with contextlib.ExitStack() as stack_exec:
+ for mock_exec in mocked_execute_celery():
+ stack_exec.enter_context(mock_exec)
+ stack_exec.enter_context(mocked_wps_output(cls.settings))
+ proc_url = f"/processes/{cls.proc_id}/execution"
+ headers = {"Prefer": "respond-async"}
+ headers.update(cls.json_headers)
+ resp = mocked_sub_requests(
+ cls.app, "post_json", proc_url,
+ data=data, headers=headers,
+ timeout=5, only_local=True
+ )
+ assert resp.status_code == 201, resp.text
+ status_url = resp.headers.get("location")
+ cls.monitor_job(status_url, return_status=True)
+ cls.job_url = status_url
+ cls.job_id = status_url.rsplit("/", 1)[-1]
+
+
+@pytest.mark.prov
+@pytest.mark.oap_part4
+@pytest.mark.functional
+class TestJobProvenance(TestJobProvenanceBase):
+ """
+ Tests to evaluate the various endpoints for :term:`Job` :term:`Provenance`.
+ """
+ @parameterized.expand([
+ ({}, {}), # default is JSON
+ ({"f": OutputFormat.JSON}, {}),
+ ({}, {"Accept": ContentType.APP_JSON}),
+ ])
+ def test_job_prov_json(self, queries, headers):
+ prov_url = f"{self.job_url}/prov"
+ resp = self.app.get(prov_url, params=queries, headers=headers)
+ assert resp.status_code == 200
+ assert resp.content_type == ContentType.APP_JSON
+ prov = resp.json
+ assert "prefix" in prov
+ assert "wfprov" in prov["prefix"]
+
+ @parameterized.expand([
+ ({"f": OutputFormat.XML}, {}),
+ ({}, {"Accept": ContentType.TEXT_XML}),
+ ({}, {"Accept": ContentType.APP_XML}),
+ ])
+ def test_job_prov_xml(self, queries, headers):
+ prov_url = f"{self.job_url}/prov"
+ resp = self.app.get(prov_url, params=queries, headers=headers)
+ assert resp.status_code == 200
+ assert resp.content_type in ContentType.ANY_XML
+ prov = resp.text
+ assert " None:
+ cls.settings = copy.deepcopy(cls.settings or {})
+ settings = {
+ "weaver.cwl_prov": False, # NOTE: this is the test
+ "weaver.wps": True,
+ "weaver.wps_path": "/ows/wps",
+ "weaver.wps_restapi_path": "/",
+ "weaver.wps_output_path": "/wpsoutputs",
+ "weaver.wps_output_url": "http://localhost/wpsoutputs",
+ "weaver.wps_output_dir": "/tmp/weaver-test/wps-outputs", # nosec: B108 # don't care hardcoded for test
+ }
+ cls.settings.update(settings)
+
+ # don't call 'TestJobProvenanceBase.setUpClass', but it's parents 'setUpClass' instead
+ # to configure the web test application the same way with above settings,
+ # while making sure to avoid re-enabling 'weaver.cwl_prov = true'
+ super(TestJobProvenanceBase, cls).setUpClass()
+
+ # NOTE:
+ # by doing the execution embedded in job setup
+ # most of the code paths without provenance will already be validated
+ # only need to validate the remaining results to match expectations
+ cls.setup_test_job()
+
+ @parameterized.expand(
+ itertools.product(
+ [None, ProvenancePathType.PROV],
+ ProvenanceFormat.formats(),
+ )
+ )
+ def test_prov_not_created(self, prov_endpoint, prov_fmt):
+ """
+ Validate that disabled :term:`Provenance` feature works and that none is generated from an execution.
+ """
+ job = self.job_store.fetch_by_id(self.job_id)
+ prov_path = job.prov_path(extra_path=prov_endpoint, prov_format=prov_fmt, container=self.settings)
+ if prov_path is None:
+ pytest.skip("Ignore invalid combination of PROV path/format.")
+ assert not os.path.exists(prov_path)
+
+ @parameterized.expand(ProvenancePathType.values())
+ def test_prov_not_found(self, prov_endpoint):
+ """
+ Validate that disabled :term:`Provenance` feature works and that endpoints are not available.
+ """
+ prov_url = f"/jobs/{self.job_id}{prov_endpoint}"
+ resp = self.app.get(prov_url, expect_errors=True)
+ assert resp.status_code == 404
diff --git a/tests/functional/utils.py b/tests/functional/utils.py
index b995d919e..4dd2f4022 100644
--- a/tests/functional/utils.py
+++ b/tests/functional/utils.py
@@ -65,6 +65,7 @@
class GenericUtils(unittest.TestCase):
def fully_qualified_test_name(self, name=""):
+ # type: (str) -> str
"""
Generates a unique name using the current test method full context name and the provided name, if any.
@@ -72,7 +73,10 @@ def fully_qualified_test_name(self, name=""):
"""
extra_name = f"-{name}" if name else ""
class_name = fully_qualified_name(self)
- test_name = f"{class_name}.{self._testMethodName}{extra_name}"
+ if hasattr(self, "_testMethodName"):
+ test_name = f"{class_name}.{self._testMethodName}{extra_name}"
+ else:
+ test_name = f"{class_name}{extra_name}" # called from class method
test_name = test_name.replace(".", "-").replace("-_", "_").replace("_-", "-")
return test_name
@@ -449,24 +453,28 @@ def deploy_process(cls,
info.append(deepcopy(resp.json))
return info # type: ignore
- def _try_get_logs(self, status_url):
- _resp = self.app.get(f"{status_url}/logs", headers=dict(self.json_headers))
+ @classmethod
+ def _try_get_logs(cls, status_url):
+ _resp = cls.app.get(f"{status_url}/logs", headers=dict(cls.json_headers))
if _resp.status_code == 200:
_text = "\n".join(_resp.json)
return f"Error logs:\n{_text}"
return ""
@overload
- def monitor_job(self, status_url, **__):
+ @classmethod
+ def monitor_job(cls, status_url, **__):
# type: (str, **Any) -> ExecutionResults
...
@overload
- def monitor_job(self, status_url, return_status=False, **__):
+ @classmethod
+ def monitor_job(cls, status_url, return_status=False, **__):
# type: (str, Literal[True], **Any) -> JobStatusResponse
...
- def monitor_job(self,
+ @classmethod
+ def monitor_job(cls,
status_url, # type: str
timeout=None, # type: Optional[int]
interval=None, # type: Optional[int]
@@ -501,17 +509,17 @@ def check_job_status(_resp, running=False):
body = _resp.json
pretty = json.dumps(body, indent=2, ensure_ascii=False)
statuses = [Status.ACCEPTED, Status.RUNNING, final_status] if running else [final_status]
- assert _resp.status_code == 200, f"Execution failed:\n{pretty}\n{self._try_get_logs(status_url)}"
- assert body["status"] in statuses, f"Error job info:\n{pretty}\n{self._try_get_logs(status_url)}"
+ assert _resp.status_code == 200, f"Execution failed:\n{pretty}\n{cls._try_get_logs(status_url)}"
+ assert body["status"] in statuses, f"Error job info:\n{pretty}\n{cls._try_get_logs(status_url)}"
return body["status"] in {final_status, Status.SUCCEEDED, Status.FAILED} # break condition
time.sleep(1) # small delay to ensure process execution had a chance to start before monitoring
- left = timeout or self.monitor_timeout
- delta = interval or self.monitor_interval
+ left = timeout or cls.monitor_timeout
+ delta = interval or cls.monitor_interval
once = True
resp = None
while left >= 0 or once:
- resp = self.app.get(status_url, headers=self.json_headers)
+ resp = cls.app.get(status_url, headers=cls.json_headers)
if check_job_status(resp, running=True):
break
time.sleep(delta)
@@ -521,7 +529,7 @@ def check_job_status(_resp, running=False):
if return_status or expect_failed:
return resp.json
params = {"schema": JobInputsOutputsSchema.OGC} # not strict to preserve old 'format' field
- resp = self.app.get(f"{status_url}/results", params=params, headers=self.json_headers)
+ resp = cls.app.get(f"{status_url}/results", params=params, headers=cls.json_headers)
assert resp.status_code == 200, f"Error job info:\n{resp.text}"
return resp.json
diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py
index 2679a3443..b6fff68a0 100644
--- a/tests/processes/test_wps_package.py
+++ b/tests/processes/test_wps_package.py
@@ -30,7 +30,7 @@
from pywps.validator.mode import MODE
from tests.utils import assert_equal_any_order
-from weaver.datatype import Process
+from weaver.datatype import Job, Process
from weaver.exceptions import PackageExecutionError, PackageTypeError
from weaver.formats import ContentType
from weaver.processes.constants import (
@@ -80,6 +80,10 @@ def __init__(self, *_, **__):
super(MockWpsPackage, self).__init__(*_, **__)
self.mock_status_location = None
+ @property
+ def job(self):
+ return Job(task_id="MockWpsPackage")
+
@property
def status_location(self):
return self.mock_status_location
@@ -198,17 +202,22 @@ def test_stdout_stderr_logging_for_commandline_tool_success(caplog):
r".*",
log_data,
re.MULTILINE | re.DOTALL
- )
+ ), f"Captured Log Information expected in:\n{log_data}"
# cwltool call with reference to the command and stdout/stderr redirects
assert re.match(
r".*"
- rf"cwltool:job.* \[job {process.id}\].*echo \\\n"
+ rf"(\[cwltool\]|cwltool:job.*) \[job {process.id}(_[0-9]+)?\].*echo \\\n"
r"\s+'Dummy message' \> [\w\-/\.]+/stdout\.log 2\> [\w\-/\.]+/stderr\.log\n"
r".*",
log_data,
re.MULTILINE | re.DOTALL
- ), f"Information expected in:\n{log_data}"
- assert f"[cwltool] [job {process.id}] completed success" in log_data
+ ), f"Command Information with Log redirects expected in:\n{log_data}"
+ assert re.match(
+ r".*"
+ rf"(\[cwltool\]|cwltool:job.*) \[job {process.id}(_[0-9]+)?\] completed success",
+ log_data,
+ re.MULTILINE | re.DOTALL
+ ), f"Information about successful job expected in:\n{log_data}"
def test_stdout_stderr_logging_for_commandline_tool_failure(caplog):
diff --git a/tests/test_provenance.py b/tests/test_provenance.py
new file mode 100644
index 000000000..0a8480864
--- /dev/null
+++ b/tests/test_provenance.py
@@ -0,0 +1,187 @@
+import itertools
+
+import pytest
+
+from weaver.formats import ContentType, OutputFormat
+from weaver.provenance import ProvenanceFormat, ProvenancePathType
+
+
+@pytest.mark.prov
+@pytest.mark.parametrize(
+ ["prov_method", "kwargs", "expected"],
+ [
+ (ProvenancePathType.as_type, {}, None),
+ (ProvenancePathType.get, {}, None),
+ (ProvenancePathType.get, {"default": None}, None),
+ (ProvenancePathType.get, {"default": "default"}, "default"),
+ (ProvenancePathType.get, {"run_id": "1234"}, None),
+ (ProvenancePathType.get, {"run_id": "1234", "default": "default"}, "default"),
+ ]
+)
+def test_provenance_path_type_unresolved(prov_method, kwargs, expected):
+ result = prov_method("random", **kwargs)
+ assert result == expected
+
+
+@pytest.mark.prov
+@pytest.mark.parametrize(
+ ["provenance", "prov_run_id", "expect_path", "expect_type"],
+ [
+ ("prov", None, ProvenancePathType.PROV, "prov"),
+ ("/prov", None, ProvenancePathType.PROV, "prov"),
+ ("info", None, ProvenancePathType.PROV_INFO, "info"),
+ ("/info", None, ProvenancePathType.PROV_INFO, "info"),
+ ("/prov/info", None, ProvenancePathType.PROV_INFO, "info"),
+ ("run", None, ProvenancePathType.PROV_RUN, "run"),
+ ("/run", None, ProvenancePathType.PROV_RUN, "run"),
+ ("/prov/run", None, ProvenancePathType.PROV_RUN, "run"),
+ ("run", "run-id", f"{ProvenancePathType.PROV_RUN}/run-id", "run"),
+ ("/run", "run-id", f"{ProvenancePathType.PROV_RUN}/run-id", "run"),
+ ("/prov/run", "run-id", f"{ProvenancePathType.PROV_RUN}/run-id", "run"),
+ ]
+)
+def test_provenance_path_type_resolution(provenance, prov_run_id, expect_path, expect_type):
+ result = ProvenancePathType.get(provenance, run_id=prov_run_id)
+ assert result == expect_path
+ result = ProvenancePathType.as_type(provenance)
+ assert result == expect_type
+
+
+@pytest.mark.prov
+def test_provenance_formats():
+ result = ProvenanceFormat.formats()
+ expect = [
+ ProvenanceFormat.PROV_JSON,
+ ProvenanceFormat.PROV_JSONLD,
+ ProvenanceFormat.PROV_TURTLE,
+ ProvenanceFormat.PROV_N,
+ ProvenanceFormat.PROV_XML,
+ ProvenanceFormat.PROV_XML,
+ ProvenanceFormat.PROV_NT,
+ ]
+ assert set(result) == set(expect)
+
+
+@pytest.mark.prov
+def test_provenance_media_types():
+ result = ProvenanceFormat.media_types()
+ expect = [
+ ContentType.APP_JSON,
+ ContentType.APP_JSONLD,
+ ContentType.TEXT_TURTLE,
+ ContentType.TEXT_PROVN,
+ ContentType.TEXT_XML,
+ ContentType.APP_XML,
+ ContentType.APP_NT,
+ ]
+ assert set(result) == set(expect)
+
+
+@pytest.mark.prov
+@pytest.mark.parametrize(
+ ["provenance", "expect"],
+ [
+ (None, None),
+ ("prov-json", ProvenanceFormat.PROV_JSON),
+ ("PROV-JSON", ProvenanceFormat.PROV_JSON),
+ ("PROV-JSONLD", ProvenanceFormat.PROV_JSONLD),
+ ]
+)
+def test_provenance_format(provenance, expect):
+ result = ProvenanceFormat.get(provenance)
+ assert result == expect
+
+
+@pytest.mark.prov
+@pytest.mark.parametrize(
+ ["provenance", "expect"],
+ [
+ (None, None),
+ (ProvenanceFormat.PROV_JSON, ContentType.APP_JSON),
+ (ProvenanceFormat.PROV_JSONLD, ContentType.APP_JSONLD),
+ (ProvenanceFormat.PROV_XML, ContentType.APP_XML),
+ (ProvenanceFormat.PROV_NT, ContentType.APP_NT),
+ (ProvenanceFormat.PROV_N, ContentType.TEXT_PROVN),
+ (ProvenanceFormat.PROV_TURTLE, ContentType.TEXT_TURTLE),
+ ]
+)
+def test_provenance_as_media_type(provenance, expect):
+ result = ProvenanceFormat.as_media_type(provenance)
+ assert result == expect
+
+
+@pytest.mark.prov
+@pytest.mark.parametrize(
+ ["prov", "prov_format", "output_format", "expect", "is_error"],
+ [
+ (None, None, None, ProvenanceFormat.PROV_JSON, False),
+ # only main PROV path allow format variants
+ (ProvenancePathType.PROV, None, None, ProvenanceFormat.PROV_JSON, False),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_JSON, None, ProvenanceFormat.PROV_JSON, False),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_JSONLD, None, ProvenanceFormat.PROV_JSONLD, False),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_XML, None, ProvenanceFormat.PROV_XML, False),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_NT, None, ProvenanceFormat.PROV_NT, False),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_N, None, ProvenanceFormat.PROV_N, False),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_TURTLE, None, ProvenanceFormat.PROV_TURTLE, False),
+ # validate implicit mapping via output format
+ (ProvenancePathType.PROV, None, OutputFormat.JSON, ProvenanceFormat.PROV_JSON, False),
+ (ProvenancePathType.PROV, None, OutputFormat.JSON_RAW, ProvenanceFormat.PROV_JSON, False),
+ (ProvenancePathType.PROV, None, OutputFormat.JSON_STR, ProvenanceFormat.PROV_JSON, False),
+ (ProvenancePathType.PROV, None, OutputFormat.YAML, ProvenanceFormat.PROV_JSON, False),
+ (ProvenancePathType.PROV, None, OutputFormat.YML, ProvenanceFormat.PROV_JSON, False),
+ (ProvenancePathType.PROV, None, OutputFormat.XML, ProvenanceFormat.PROV_XML, False),
+ (ProvenancePathType.PROV, None, OutputFormat.TEXT, ProvenanceFormat.PROV_N, False),
+ (ProvenancePathType.PROV, None, OutputFormat.TXT, ProvenanceFormat.PROV_N, False),
+ # check some combinations considered invalid
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_N, OutputFormat.JSON, None, True),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_N, OutputFormat.XML, None, True),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_NT, OutputFormat.JSON, None, True),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_NT, OutputFormat.XML, None, True),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_XML, OutputFormat.JSON_RAW, None, True),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_JSON, OutputFormat.XML, None, True),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_TURTLE, OutputFormat.JSON, None, True),
+ (ProvenancePathType.PROV, None, OutputFormat.HTML, None, True),
+ (ProvenancePathType.PROV, ProvenanceFormat.PROV_JSON, OutputFormat.TEXT, None, True),
+ (ProvenancePathType.PROV_INFO, None, OutputFormat.JSON, None, True),
+ (ProvenancePathType.PROV_INFO, ProvenanceFormat.PROV_JSON, OutputFormat.JSON, None, True),
+ ]
+ +
+ [
+ # all but the main PROV paths are text-only
+ # no output format, so it default to None resolved, and no error
+ (_prov, _prov_fmt, None, None, False)
+ for _prov, _prov_fmt
+ in itertools.product(
+ set(ProvenancePathType.types()) - {ProvenancePathType.as_type(ProvenancePathType.PROV)},
+ ProvenanceFormat.values(),
+ )
+ ]
+ +
+ [
+ # all but the main PROV paths are text-only
+ # if anything is specified other than text, it's an error
+ (_prov, _prov_fmt, _out_fmt, None, True)
+ for _prov, _prov_fmt, _out_fmt
+ in itertools.product(
+ set(ProvenancePathType.types()) - {ProvenancePathType.as_type(ProvenancePathType.PROV)},
+ ProvenanceFormat.values(),
+ set(OutputFormat.values()) - {OutputFormat.TEXT, OutputFormat.TXT},
+ )
+ ]
+ +
+ [
+ # all but the main PROV paths are text-only
+ # valid if the output format is text
+ (_prov, _prov_fmt, _out_fmt, None, False)
+ for _prov, _prov_fmt, _out_fmt
+ in itertools.product(
+ set(ProvenancePathType.types()) - {ProvenancePathType.as_type(ProvenancePathType.PROV)},
+ ProvenanceFormat.values(),
+ [OutputFormat.TEXT, OutputFormat.TXT],
+ )
+ ]
+)
+def test_provenance_format_compatible(prov, prov_format, output_format, expect, is_error):
+ result, error = ProvenanceFormat.resolve_compatible_formats(prov, prov_format, output_format)
+ assert result == expect
+ assert error if is_error else error is None, "When an error is expected, a string detailing it should be returned."
diff --git a/weaver/base.py b/weaver/base.py
index 4631c51ce..0e44965d1 100644
--- a/weaver/base.py
+++ b/weaver/base.py
@@ -39,7 +39,11 @@ class Constants(object, metaclass=_Const):
@classmethod
def __members__(cls):
members = set(cls.__dict__) - set(object.__dict__)
- members = [member for member in members if not inspect.ismethod(getattr(cls, member))]
+ members = [
+ member for member in members
+ if not isinstance(object.__getattribute__(cls, member), classmethod)
+ and not inspect.ismethod(getattr(cls, member))
+ ]
return [member for member in members if not isinstance(member, str) or not member.startswith("_")]
@classmethod
@@ -109,19 +113,19 @@ class classproperty(property): # pylint: disable=C0103,invalid-name
.. seealso::
https://stackoverflow.com/a/5191224
"""
-
- def __init__(self,
- fget=None, # type: Optional[Callable[[object], PropertyDataTypeT]]
- fset=None, # type: Optional[Callable[[object, PropertyDataTypeT], None]]
- fdel=None, # type: Optional[Callable[[object], None]]
- doc="", # type: str
- ): # type: (...) -> None
+ def __init__(
+ self,
+ fget=None, # type: Optional[Callable[[object], PropertyDataTypeT]]
+ fset=None, # type: Optional[Callable[[object, PropertyDataTypeT], None]]
+ fdel=None, # type: Optional[Callable[[object], None]]
+ doc="", # type: str
+ ): # type: (...) -> None
super(classproperty, self).__init__(fget=fget, fset=fset, fdel=fdel, doc=doc)
self.__doc__ = inspect.cleandoc(doc)
- def __get__(self, cls, owner): # noqa
- # type: (Type[object], Any) -> PropertyDataTypeT
- return classmethod(self.fget).__get__(None, owner)()
+ def __get__(self, instance, owner=None):
+ # type: (Any, Optional[Type[object]]) -> PropertyDataTypeT
+ return self.fget.__get__(None, owner)(instance or owner) # pylint: disable=E1101,no-member # false-positive
class _EnumMeta(enum.EnumMeta):
diff --git a/weaver/cli.py b/weaver/cli.py
index e194a98e3..dbfcec97f 100644
--- a/weaver/cli.py
+++ b/weaver/cli.py
@@ -35,6 +35,7 @@
)
from weaver.processes.utils import get_process_information
from weaver.processes.wps_package import get_process_definition
+from weaver.provenance import ProvenanceFormat, ProvenancePathType
from weaver.sort import Sort, SortMethods
from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status
from weaver.utils import (
@@ -68,10 +69,12 @@
# https://github.com/ashb/sphinx-argparse/issues/7
try:
from weaver.typedefs import (
+ URL,
AnyHeadersContainer,
AnyRequestMethod,
AnyRequestType,
AnyResponseType,
+ AnyUUID,
CookiesType,
CWL,
CWL_IO_ValueMap,
@@ -97,6 +100,8 @@
ExecutionResultValue = Union[ExecutionResultObjectRef, List[ExecutionResultObjectRef]]
JobSubscribers = Dict[str, Any]
HeadersType = Dict[str, str]
+ URL = str
+ AnyUUID = str
try:
from weaver.formats import AnyOutputFormat
from weaver.processes.constants import ProcessSchemaType
@@ -110,7 +115,8 @@
ConditionalGroup = Tuple[argparse._ActionsContainer, bool, bool] # noqa
PostHelpFormatter = Callable[[str], str]
- ArgumentParserRule = Tuple[argparse._ActionsContainer, Callable[[argparse.Namespace], Optional[bool]], str] # noqa
+ ArgumentParserRuleCheck = Callable[[argparse.Namespace], Optional[Union[bool, str]]]
+ ArgumentParserRule = Tuple[argparse._ActionsContainer, ArgumentParserRuleCheck, str] # noqa
LOGGER = logging.getLogger("weaver.cli") # do not use '__name__' since it becomes '__main__' from CLI call
@@ -149,7 +155,7 @@ def __init__(
self.success = success
self.message = message
self.headers = ResponseHeaders(headers) if headers is not None else None
- self.body = body
+ self.body = body or text
self.text = text
self.code = code
@@ -633,8 +639,8 @@ def _parse_deploy_package(
return OperationResult(True, p_id, body)
def _parse_job_ref(self, job_reference, url=None):
- # type: (str, Optional[str]) -> Tuple[Optional[str], Optional[str]]
- if job_reference.startswith("http"):
+ # type: (Union[URL, AnyUUID], Optional[str]) -> Tuple[Optional[str], Optional[str]]
+ if str(job_reference).startswith("http"):
job_url = job_reference
job_parts = [part for part in job_url.split("/") if part.strip()]
job_id = job_parts[-1]
@@ -1526,7 +1532,7 @@ def execute(
def trigger_job(
self,
- job_reference, # type: str
+ job_reference, # type: Union[URL, AnyUUID]
url=None, # type: Optional[str]
auth=None, # type: Optional[AuthBase]
headers=None, # type: Optional[AnyHeadersContainer]
@@ -1570,7 +1576,7 @@ def trigger_job(
def update_job(
self,
- job_reference, # type: str
+ job_reference, # type: Union[URL, AnyUUID]
title=null, # type: Union[Type[null], Optional[str]]
inputs=None, # type: Optional[Union[str, ExecutionInputs, CWL_IO_ValueMap]]
subscribers=None, # type: Optional[JobSubscribers]
@@ -1857,7 +1863,7 @@ def jobs(
def status(
self,
- job_reference, # type: str
+ job_reference, # type: Union[URL, AnyUUID]
url=None, # type: Optional[str]
auth=None, # type: Optional[AuthBase]
headers=None, # type: Optional[AnyHeadersContainer]
@@ -1898,7 +1904,7 @@ def status(
def _job_info(
self,
x_path, # type: str
- job_reference, # type: str
+ job_reference, # type: Union[URL, AnyUUID]
url=None, # type: Optional[str]
auth=None, # type: Optional[AuthBase]
headers=None, # type: Optional[AnyHeadersContainer]
@@ -1914,7 +1920,7 @@ def _job_info(
The :term:`Job` must be in the expected status to retrieve relevant information.
.. seealso::
- :ref:`proc_op_result`
+ :ref:`proc_op_status`
:param job_reference: Either the full :term:`Job` status URL or only its UUID.
:param url: Instance URL if not already provided during client creation.
@@ -1959,6 +1965,85 @@ def statistics(self, *args, **kwargs):
stats = statistics # alias
+ def provenance(
+ self,
+ job_reference, # type: Union[URL, AnyUUID]
+ prov=None, # type: Optional[ProvenancePathType]
+ prov_run_id=None, # type: Optional[AnyUUID]
+ prov_format=None, # type: Optional[ProvenanceFormat]
+ output_format=None, # type: Optional[AnyOutputFormat]
+ url=None, # type: Optional[str]
+ auth=None, # type: Optional[AuthBase]
+ headers=None, # type: Optional[AnyHeadersContainer]
+ **kwargs, # type: Any
+ ): # type: (...) -> OperationResult
+ """
+ Obtain the :term:`Provenance` metadata from a successful :term:`Job` execution.
+
+ The :term:`Job` must be in the expected status to retrieve relevant information.
+
+ .. seealso::
+ - :ref:`proc_op_status`
+ - :ref:`proc_op_job_prov`
+
+ :param job_reference: Either the full :term:`Job` status URL or only its UUID.
+ :param prov:
+ Type of :term:`Provenance` metadata to retrieve, as expressed by relative path.
+ For example, retrieving an execution run metadata can be requested with ``run``, ``/run`` or ``/prov/run``.
+ Available relative paths are as per defined by the :term:`API` endpoints (see :class:`ProvenancePathType`).
+ Can be combined in certain cases with a :paramref:`run_id` to obtain only the metadata of a nested step
+ within a :term:`Workflow` execution. If omitted, returns the main :term:`Provenance` metadata
+ representation as per the requested :paramref:`prov_format` (see :class:`ProvenanceFormat`).
+ :param prov_run_id:
+ Specific run (i.e.: a nested :term:`Workflow` step) for which to retrieve :term:`Provenance` metadata.
+ Applicable IDs will typically correspond to the underlying :term:`Job` ID that would have been created
+ for the corresponding steps, but could differ in particular situations.
+ To make sure, the top-most ``PROV`` metadata should be inspected to extract relevant run IDs.
+ :param prov_format:
+ Desired :term:`Provenance` metadata representation (see :class:`ProvenanceFormat`).
+ Applicable only when retrieving the ``PROV`` details (i.e.: :paramref:`prov` must be ``None`` or ``/prov``).
+ Ignored otherwise.
+ Can be combined with :paramref:`output_format` to convert the representation into semantically equivalent
+ representations. For example, :attr:`ProvenanceFormat.PROV_JSON` could be converted into the corresponding
+ :term:`YAML` representation using :attr:`OutputFormat.YAML`. However, this is limited only to directly
+ mappable representations (i.e.: :term:`JSON`, :term:`YAML`, :term:`XML`).
+ :param output_format:
+ Select an alternate output representation of the result body contents.
+ See also :paramref:`prov_format` for even more format combinations specific to :term:`Provenance` metadata.
+ :param url: Instance URL if not already provided during client creation.
+ :param auth:
+ Instance authentication handler if not already created during client creation.
+ Should perform required adjustments to request to allow access control of protected contents.
+ :param headers:
+ Additional headers to employ when sending request.
+ Note that this can break functionalities if expected headers are overridden. Use with care.
+ :returns: Retrieved information from the :term:`Job`.
+ """
+ prov_path = ProvenancePathType.get(prov, run_id=prov_run_id, default=ProvenancePathType.PROV)
+ prov_format, err_msg = ProvenanceFormat.resolve_compatible_formats(prov, prov_format, output_format)
+ if err_msg:
+ return OperationResult(False, message=err_msg)
+ if prov_format:
+ prov_ctype = ProvenanceFormat.as_media_type(prov_format)
+ if prov_ctype:
+ headers = CaseInsensitiveDict(headers or {})
+ headers["Accept"] = prov_ctype
+ if prov_path != ProvenancePathType.PROV:
+ headers = CaseInsensitiveDict(headers or {})
+ headers["Accept"] = ContentType.TEXT_PLAIN
+ result = self._job_info(
+ prov_path,
+ job_reference,
+ url=url,
+ auth=auth,
+ headers=headers,
+ output_format=output_format,
+ **kwargs,
+ )
+ return result
+
+ prov = provenance # alias
+
def monitor(
self,
job_reference, # type: str
@@ -2598,6 +2683,55 @@ def add_timeout_param(parser):
)
+def add_provenance_params(parser):
+ # type: (argparse.ArgumentParser) -> None
+ parser.add_argument(
+ "-pT", "--prov", "--prov-type", dest="prov",
+ choices=ProvenancePathType.types(),
+ help=(
+ "Desired PROV metadata contents. "
+ "The main PROV metadata supports multiple representations. "
+ "All others are only available as plain text."
+ )
+ )
+ parser.add_argument(
+ "-pF", "--prov-format", dest="prov_format",
+ choices=ProvenanceFormat.formats(),
+ help=(
+ "Desired PROV metadata schema representation. "
+ "Applicable formats depend on the PROV metadata type being requested. "
+ "Can be combined with -F/--format to transform the result to an alternate representation if compatible. "
+ "Note that certain request headers will be overridden to obtain the requested format even if they are "
+ "explicitly specified by the corresponding -H/--header option."
+ )
+ )
+ parser.add_argument(
+ "-pR", "--run", "--prov-run", dest="prov_run_id",
+ choices=ProvenancePathType.types(),
+ help=(
+ "Specific run (i.e.: a nested Workflow step) for which to retrieve Provenance metadata. "
+ "Applicable IDs will typically correspond to the underlying Job ID that would have been "
+ "created for the corresponding steps, but could differ in particular situations. "
+ "To make sure, the top-most PROV metadata should be inspected to extract relevant run IDs."
+ )
+ )
+
+
+def check_compatible_prov_formats(ns):
+ # type: (argparse.Namespace) -> Optional[str]
+ """
+ Check multiple output format and PROV format for valid combinations.
+
+ If valid, update the arguments to make them work during invocation.
+ Otherwise, return the relevant error to fail argument validation and print the error message.
+ """
+ prov_format, err_msg = ProvenanceFormat.resolve_compatible_formats(ns.prov, ns.prov_format, ns.output_format)
+ if err_msg:
+ return err_msg
+ if prov_format:
+ ns.prov_format = prov_format
+
+
class SubscriberAction(argparse.Action):
"""
Action that will validate that the input argument references a valid subscriber argument.
@@ -2996,8 +3130,8 @@ def format_help(self):
self.help_mode = False
return text
- def add_rule(self, rule, failure):
- # type: (Callable[[argparse.Namespace], Optional[bool]], str) -> None
+ def add_rule(self, rule, failure=None):
+ # type: (ArgumentParserRuleCheck, Optional[str]) -> None
self._rules.add((self, rule, failure))
def parse_known_args(self, args=None, namespace=None):
@@ -3011,7 +3145,10 @@ def parse_known_args(self, args=None, namespace=None):
"""
ns, args = super(WeaverArgumentParser, self).parse_known_args(args=args, namespace=namespace)
for container, rule, failure in self._rules:
- if rule(ns) not in [None, True]:
+ result = rule(ns)
+ if result not in [None, True]:
+ if isinstance(result, str):
+ failure = f"{failure} because {result}"
container.error(failure)
return ns, args
@@ -3398,6 +3535,25 @@ def make_parser():
add_job_ref_param(op_statistics)
add_shared_options(op_statistics)
+ op_provenance = WeaverArgumentParser(
+ "provenance",
+ description=(
+ "Obtain the provenance metadata of a job using a reference UUID or URL. "
+ "Different W3C PROV representations can be retrieved according to specified format options. "
+ "Furthermore, different parts of the provenance metadata can be extracted."
+ ),
+ formatter_class=ParagraphFormatter,
+ )
+ set_parser_sections(op_provenance)
+ add_url_param(op_provenance, required=False)
+ add_job_ref_param(op_provenance)
+ add_shared_options(op_provenance)
+ add_provenance_params(op_provenance)
+ op_provenance.add_rule(
+ check_compatible_prov_formats,
+ "specified options for -pF/--prov-format and -F/--format are not compatible",
+ )
+
op_results = WeaverArgumentParser(
"results",
description=(
@@ -3472,6 +3628,7 @@ def make_parser():
op_logs,
op_exceptions,
op_statistics,
+ op_provenance,
op_results,
op_upload,
]
@@ -3479,6 +3636,7 @@ def make_parser():
"processes": op_capabilities,
"errors": op_exceptions,
"stats": op_statistics,
+ "prov": op_provenance,
}
for op_parser in operations:
op_aliases = [alias for alias, op_alias in aliases.items() if op_alias is op_parser]
diff --git a/weaver/datatype.py b/weaver/datatype.py
index 5b79d8f2e..d10949581 100644
--- a/weaver/datatype.py
+++ b/weaver/datatype.py
@@ -6,6 +6,7 @@
import copy
import enum
import inspect
+import io
import json
import os
import re
@@ -26,6 +27,7 @@
import pyramid.httpexceptions
import requests.exceptions
from cryptography.fernet import Fernet
+from cwlprov.tool import Tool as CWLProvTool
from dateutil.parser import parse as dt_parse
from docker.auth import decode_auth # pylint: disable=E0611
from owslib.util import ServiceException as OWSServiceException
@@ -55,6 +57,7 @@
)
from weaver.processes.convert import get_field, json2oas_io, normalize_ordered_io, null, ows2json, wps2json_io
from weaver.processes.types import ProcessType
+from weaver.provenance import ProvenanceFormat
from weaver.quotation.status import QuoteStatus
from weaver.status import JOB_STATUS_CATEGORIES, Status, StatusCategory, map_status
from weaver.store.base import StoreProcesses
@@ -76,7 +79,7 @@
)
from weaver.visibility import Visibility
from weaver.warning import NonBreakingExceptionWarning, UnsupportedOperationWarning
-from weaver.wps.utils import get_wps_client, get_wps_url
+from weaver.wps.utils import get_wps_client, get_wps_output_dir, get_wps_url
from weaver.wps_restapi import swagger_definitions as sd
from weaver.wps_restapi.utils import get_wps_restapi_base_url
@@ -94,8 +97,10 @@
AnyExecuteReturnPreference,
AnyExecuteTransmissionMode
)
+ from weaver.formats import AnyContentType
from weaver.processes.constants import ProcessSchemaType
from weaver.processes.types import AnyProcessType
+ from weaver.provenance import AnyProvenanceFormat, ProvenancePathType
from weaver.quotation.status import AnyQuoteStatus
from weaver.status import AnyStatusType, StatusType
from weaver.typedefs import (
@@ -1403,18 +1408,20 @@ def response(self, response):
response = xml_util.tostring(response)
self["response"] = response
- def _job_url(self, base_url):
- # type: (str) -> str
+ def process_url(self, container=None):
+ # type: (Optional[AnySettingsContainer], Optional[str]) -> str
+ settings = get_settings(container)
+ base_url = get_wps_restapi_base_url(settings)
if self.service is not None:
base_url += sd.provider_service.path.format(provider_id=self.service)
- job_path = sd.process_job_service.path.format(process_id=self.process, job_id=self.id)
- return base_url + job_path
+ proc_url = sd.process_service.path.format(process_id=self.process)
+ return base_url + proc_url
def job_url(self, container=None, extra_path=None):
# type: (Optional[AnySettingsContainer], Optional[str]) -> str
- settings = get_settings(container)
- base_url = get_wps_restapi_base_url(settings)
- return self._job_url(base_url) + (extra_path or "")
+ proc_url = self.process_url(container)
+ job_url = sd.job_service.path.format(job_id=self.id)
+ return proc_url + job_url + (extra_path or "")
def status_url(self, container=None):
# type: (Optional[AnySettingsContainer]) -> str
@@ -1466,6 +1473,74 @@ def result_path(self, job_id=None, output_id=None, file_name=None):
result_job_path = os.path.join(result_job_path, file_name)
return result_job_path
+ def prov_url(self, container=None, extra_path=None):
+ # type: (Optional[AnySettingsContainer], Optional[ProvenancePathType]) -> str
+ extra_path = str(extra_path or "")
+ prov_path = f"/prov{extra_path}"
+ return self.job_url(container=container, extra_path=prov_path)
+
+ def prov_path(self, container=None, extra_path=None, prov_format=None):
+ # type: (Optional[AnySettingsContainer], Optional[ProvenancePathType], Optional[AnyProvenanceFormat]) -> str
+ """
+ Obtain the relative path of the ``PROV`` contents.
+ """
+ job_path = self.result_path()
+ prov_path = f"{job_path}-prov"
+ prov_format = ProvenanceFormat.get(prov_format, allow_media_type=True)
+ _prov_path_mapping = {
+ (None, None): prov_path, # the directory itself with all metadata
+ ("/prov", None): f"{prov_path}/metadata/provenance/primary.cwlprov.json",
+ ("/prov", ProvenanceFormat.PROV_JSON): f"{prov_path}/metadata/provenance/primary.cwlprov.json",
+ ("/prov", ProvenanceFormat.PROV_JSONLD): f"{prov_path}/metadata/provenance/primary.cwlprov.jsonld",
+ ("/prov", ProvenanceFormat.PROV_TURTLE): f"{prov_path}/metadata/provenance/primary.cwlprov.ttl",
+ ("/prov", ProvenanceFormat.PROV_XML): f"{prov_path}/metadata/provenance/primary.cwlprov.xml",
+ ("/prov", ProvenanceFormat.PROV_N): f"{prov_path}/metadata/provenance/primary.cwlprov.provn",
+ ("/prov", ProvenanceFormat.PROV_NT): f"{prov_path}/metadata/provenance/primary.cwlprov.nt",
+ } # type: Dict[Tuple[Optional[ProvenancePathType], ProvenanceFormat], str]
+ key = (extra_path, prov_format)
+ resolved_path = _prov_path_mapping.get(key)
+ if resolved_path:
+ out_dir = get_wps_output_dir(container)
+ return os.path.join(out_dir, resolved_path)
+ return resolved_path
+
+ def prov_data(
+ self,
+ container=None, # type: Optional[AnySettingsContainer]
+ extra_path=None, # type: Optional[ProvenancePathType]
+ prov_format=None, # type: AnyContentType
+ ): # type: (...) -> Tuple[Optional[str], Optional[AnyContentType]]
+ """
+ Read or retrieve data from the packaged provenance directory contents associated to the :term:`Job`.
+ """
+ prov_path = self.prov_path(container=container, extra_path=extra_path, prov_format=prov_format)
+ if prov_path and os.path.isfile(prov_path):
+ with open(prov_path, mode="r", encoding="utf-8") as prov_f:
+ data = prov_f.read()
+ fmt = prov_format
+ else:
+ prov_path = self.prov_path(container=container)
+ if not prov_path or not os.path.isdir(prov_path):
+ return None, None
+ path = str(extra_path).split("/prov/", 1)[-1]
+ frag = path.strip("/").split("/")
+ oper, params = frag[0], frag[1:]
+ args = ["-d", prov_path, oper]
+ if oper == "run":
+ args.extend(["--steps", "--start", "--end", "--duration", "--labels", "--inputs", "--outputs"])
+ elif oper in ["inputs", "outputs"]:
+ args.extend(["--parameters", "--format", "uris"])
+ args.extend(params)
+ tool = CWLProvTool(args)
+ tool.output = io.StringIO() # override the buffer argument to "print"
+ result = tool.main() # noqa # function annotated to return nothing, but sometimes returns an error code
+ if result not in [0, None]:
+ return None, None
+ tool.output.seek(0)
+ data = tool.output.read()
+ fmt = ContentType.TEXT_PLAIN
+ return data, fmt
+
def links(self, container=None, self_link=None):
# type: (Optional[AnySettingsContainer], Optional[str]) -> List[Link]
"""
@@ -1480,7 +1555,7 @@ def links(self, container=None, self_link=None):
settings = get_settings(container)
html_on = settings.get("weaver.wps_restapi_html", True)
base_url = get_wps_restapi_base_url(settings)
- job_url = self._job_url(base_url) # full URL
+ job_url = self.job_url(settings) # full URL
job_path = base_url + sd.job_service.path.format(job_id=self.id)
job_exec = f"{job_url.rsplit('/', 1)[0]}/execution"
job_list = base_url + sd.jobs_service.path
@@ -1506,7 +1581,6 @@ def links(self, container=None, self_link=None):
if self_link in ["status", None]:
job_links.extend([
{"href": job_list, "rel": "collection", "title": "List of submitted jobs."}, # IANA
-
])
if self.status in JOB_STATUS_CATEGORIES[StatusCategory.FINISHED]:
@@ -1519,6 +1593,10 @@ def links(self, container=None, self_link=None):
"title": "Job results of successful process execution (direct output values mapping)."},
{"href": f"{job_url}/statistics", "rel": "statistics", # unofficial
"title": "Job statistics collected following process execution."},
+ {"href": f"{job_url}/prov", "rel": "provenance", # unofficial
+ "title": "Job provenance collected following process execution."},
+ {"href": f"{job_url}/prov", "rel": "https://www.w3.org/ns/prov", # unofficial
+ "title": "Job provenance collected following process execution."},
])
else:
job_links.append({
@@ -1529,7 +1607,7 @@ def links(self, container=None, self_link=None):
"href": f"{job_url}/logs", "rel": "logs", # unofficial
"title": "List of collected job logs during process execution."
})
- if self_link in ["status", "inputs", "outputs", "results", "logs", "exceptions"]:
+ if self_link in ["status", "inputs", "outputs", "results", "logs", "exceptions", "provenance"]:
self_link_body = list(filter(lambda _link: _link["rel"].endswith(self_link), job_links))[-1]
self_link_body = copy.deepcopy(self_link_body)
# back to specific job if we are in one of its sub-endpoints
@@ -1542,8 +1620,8 @@ def links(self, container=None, self_link=None):
job_links.extend([self_link_body, self_link_up])
link_meta = {"type": ContentType.APP_JSON, "hreflang": AcceptLanguage.EN_CA}
for link in job_links:
- for meta, parma in link_meta.items():
- link.setdefault(meta, parma)
+ for meta, param in link_meta.items():
+ link.setdefault(meta, param)
return job_links
def json(self, container=None): # pylint: disable=W0221,arguments-differ
@@ -1956,7 +2034,7 @@ def authorized(cls, file, token):
return compare_digest(str(access), str(token))
def encrypt(self, file):
- # type: (IO[bytes|str]) -> BytesIO
+ # type: (IO[Union[bytes, str]]) -> BytesIO
"""
Encrypt file data using a secret to avoid plain text contents during temporary :term:`Vault` storage.
@@ -1972,7 +2050,7 @@ def encrypt(self, file):
return BytesIO(digest)
def decrypt(self, file):
- # type: (IO[bytes|str]) -> BytesIO
+ # type: (IO[Union[bytes, str]]) -> BytesIO
"""
Decrypt file contents using secret.
"""
diff --git a/weaver/formats.py b/weaver/formats.py
index cddc7dcf9..449429f7c 100644
--- a/weaver/formats.py
+++ b/weaver/formats.py
@@ -96,11 +96,13 @@ class ContentType(Constants):
APP_GZIP = "application/gzip"
APP_HDF5 = "application/x-hdf5"
APP_JSON = "application/json"
+ APP_JSONLD = "application/ld+json"
APP_RAW_JSON = "application/raw+json"
APP_OAS_JSON = "application/vnd.oai.openapi+json; version=3.0"
APP_OGC_PKG_JSON = "application/ogcapppkg+json"
APP_OGC_PKG_YAML = "application/ogcapppkg+yaml"
APP_NETCDF = "application/x-netcdf"
+ APP_NT = "application/n-triples"
APP_OCTET_STREAM = "application/octet-stream"
APP_PDF = "application/pdf"
APP_TAR = "application/x-tar" # map to existing gzip for CWL
@@ -125,6 +127,8 @@ class ContentType(Constants):
TEXT_PLAIN = "text/plain"
TEXT_RICHTEXT = "text/richtext"
TEXT_XML = "text/xml"
+ TEXT_PROVN = "text/provenance-notation"
+ TEXT_TURTLE = "text/turtle"
VIDEO_MPEG = "video/mpeg"
# special handling
diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py
index c0d4d7bf2..7d301df82 100644
--- a/weaver/processes/wps_package.py
+++ b/weaver/processes/wps_package.py
@@ -11,7 +11,6 @@
- `WPS-REST schemas `_
- :mod:`weaver.wps_restapi.api` conformance details
"""
-
import copy
import json
import logging
@@ -30,10 +29,12 @@
import cwltool.process
import yaml
from cwltool.context import LoadingContext, RuntimeContext
+from cwltool.cwlprov.writablebagfile import close_ro, packed_workflow
from cwltool.factory import Factory as CWLFactory, WorkflowStatus as CWLException
from cwltool.process import shortname, use_custom_schema
from cwltool.secrets import SecretStore
from pyramid.httpexceptions import HTTPOk, HTTPServiceUnavailable
+from pyramid.settings import asbool
from pywps import Process
from pywps.inout.basic import SOURCE_TYPE, DataHandler, FileHandler, IOHandler, NoneIOHandler
from pywps.inout.formats import Format
@@ -128,6 +129,7 @@
from weaver.processes.sources import retrieve_data_source_url
from weaver.processes.types import ProcessType
from weaver.processes.utils import load_package_file, map_progress, pull_docker
+from weaver.provenance import WeaverResearchObject
from weaver.status import STATUS_PYWPS_IDS, Status, StatusCompliant, map_status
from weaver.store.base import StoreJobs, StoreProcesses
from weaver.utils import (
@@ -1811,6 +1813,50 @@ def setup_runtime(self):
}
return runtime_params
+ def setup_provenance(self, loading_context, runtime_context):
+ # type: (LoadingContext, RuntimeContext) -> None
+ """
+ Configure ``PROV`` runtime options.
+
+ .. seealso::
+ - https://www.w3.org/TR/prov-overview/
+ - https://cwltool.readthedocs.io/en/latest/CWLProv.html
+ - https://docs.ogc.org/DRAFTS/24-051.html#_requirements_class_provenance
+ """
+ weaver_cwl_prov = asbool(self.settings.get("weaver.cwl_prov", True))
+ if not weaver_cwl_prov:
+ loading_context.research_obj = None
+ runtime_context.research_obj = None
+ runtime_context.prov_obj = None
+ return
+
+ runtime_context.prov_user = loading_context.user_provenance = True
+ runtime_context.prov_host = loading_context.host_provenance = True
+
+ if not runtime_context.research_obj:
+ ro = WeaverResearchObject(
+ self.job, # align the RO definition with the job (make the UUIDs equal)
+ self.settings,
+ runtime_context.make_fs_access(""),
+ temp_prefix_ro=runtime_context.tmpdir_prefix,
+ orcid=runtime_context.orcid,
+ full_name=runtime_context.cwl_full_name,
+ )
+
+ loading_context.research_obj = ro
+ runtime_context.research_obj = ro
+
+ def finalize_provenance(self, runtime_context):
+ # type: (RuntimeContext) -> None
+ if runtime_context.research_obj:
+ # perform packaging of the workflow
+ packed_wf_str = repr_json(self.package, force_string=True, indent=2)
+ packed_workflow(runtime_context.research_obj, packed_wf_str)
+
+ # sign-off and persist completed PROV
+ prov_dir = self.job.prov_path(self.settings)
+ close_ro(runtime_context.research_obj, prov_dir)
+
def update_requirements(self):
# type: () -> None
"""
@@ -2113,13 +2159,10 @@ def _handler(self, request, response):
elif config == WeaverConfiguration.HYBRID:
self.remote_execution = problem_needs_remote is not None
+ loading_context = LoadingContext()
if self.remote_execution:
# EMS/Hybrid dispatch the execution to ADES or remote WPS
- loading_context = LoadingContext()
loading_context.construct_tool_object = self.make_tool
- else:
- # ADES/Hybrid execute the CWL/AppPackage locally
- loading_context = None
self.update_effective_user()
self.update_requirements()
@@ -2132,6 +2175,7 @@ def _handler(self, request, response):
)
runtime_context = RuntimeContext(kwargs=runtime_params)
runtime_context.secret_store = SecretStore() # pre-allocate to reuse the same references as needed
+ self.setup_provenance(loading_context, runtime_context)
try:
self.step_launched = []
package_inst, _, self.step_packages = _load_package_content(self.package,
@@ -2203,6 +2247,15 @@ def _handler(self, request, response):
self.update_status("Generate package outputs done.", PACKAGE_PROGRESS_PREP_OUT, Status.RUNNING)
except Exception as exc:
raise self.exception_message(PackageExecutionError, exc, "Failed to save package outputs.")
+ try:
+ self.finalize_provenance(runtime_context)
+ except Exception as exc: # pragma: no cover # only safeguard, it's good if this branch never occurs!
+ self.exception_message(
+ PackageExecutionError,
+ exc,
+ "Failed to save package PROV metadata. Ignoring error to avoid failing execution.",
+ level=logging.WARN,
+ )
except Exception:
# return log file location by status message since outputs are not obtained by WPS failed process
log_url = f"{get_wps_output_url(self.settings)}/{self.uuid}.log"
diff --git a/weaver/provenance.py b/weaver/provenance.py
new file mode 100644
index 000000000..648674331
--- /dev/null
+++ b/weaver/provenance.py
@@ -0,0 +1,359 @@
+"""
+Definitions related to :term:`Provenance` features and the :term:`W3C` ``PROV`` specification.
+"""
+import hashlib
+from typing import TYPE_CHECKING, cast
+from urllib.parse import urlparse
+
+from cwltool.cwlprov import provenance_constants as cwl_prov_const
+from cwltool.cwlprov.ro import ResearchObject
+from prov import constants as prov_const
+
+from weaver.__meta__ import __version__ as weaver_version
+from weaver.base import Constants
+from weaver.formats import ContentType, OutputFormat
+from weaver.utils import get_weaver_url
+
+if TYPE_CHECKING:
+ from typing import Any, List, Optional, Tuple, Union
+ from uuid import UUID
+
+ from cwltool.cwlprov.provenance_profile import ProvenanceProfile
+ from cwltool.stdfsaccess import StdFsAccess
+ from prov.model import ProvDocument
+
+ from weaver.base import EnumType
+ from weaver.datatype import Job
+ from weaver.formats import AnyContentType
+ from weaver.typedefs import AnyKey, AnySettingsContainer
+
+ AnyProvenanceFormat = Union[AnyContentType, "ProvenanceFormat"]
+
+
+class ProvenancePathType(Constants):
+ PROV = "/prov"
+ PROV_INFO = "/prov/info"
+ PROV_WHO = "/prov/who"
+ PROV_INPUTS = "/prov/inputs"
+ PROV_OUTPUTS = "/prov/outputs"
+ PROV_RUN = "/prov/run"
+ PROV_RUNS = "/prov/runs"
+
+ @classmethod
+ def types(cls):
+ # type: () -> List[str]
+ return [cls.as_type(prov) for prov in cls.values()]
+
+ @classmethod
+ def as_type(cls, prov):
+ # type: (Any) -> Optional[str]
+ prov = cls.get(prov)
+ if isinstance(prov, str):
+ return prov.rsplit("/", 1)[-1]
+ return None
+
+ @classmethod
+ def get( # pylint: disable=W0221,W0237 # arguments differ/renamed for clarity
+ cls,
+ prov, # type: Union[AnyKey, EnumType, "ProvenancePathType"]
+ default=None, # type: Optional[Any]
+ run_id=None, # type: Optional[str]
+ ): # type: (...) -> Optional["ProvenancePathType"]
+ prov_found = super().get(prov)
+ if prov_found is not None and run_id is None:
+ return prov_found
+ if isinstance(prov, str):
+ if not prov_found and prov.strip("/") not in ProvenancePathType.types():
+ return default
+ prov = f"/{prov}" if not prov.startswith("/") else prov
+ prov = f"/prov{prov}" if not prov.startswith("/prov") else prov
+ if run_id:
+ if prov.rsplit("/", 1)[-1] in ["run", "inputs", "outputs"]:
+ prov = f"{prov}/{run_id}"
+ else:
+ return default
+ return cast("ProvenancePathType", prov)
+ return default
+
+
+class ProvenanceFormat(Constants):
+ PROV_JSON = "PROV-JSON"
+ PROV_JSONLD = "PROV-JSONLD"
+ PROV_XML = "PROV-XML"
+ PROV_TURTLE = "PROV-TURTLE"
+ PROV_N = "PROV-N"
+ PROV_NT = "PROV-NT"
+
+ _media_types = {
+ ContentType.APP_JSON: PROV_JSON,
+ ContentType.APP_JSONLD: PROV_JSONLD,
+ ContentType.TEXT_TURTLE: PROV_TURTLE,
+ ContentType.TEXT_PROVN: PROV_N,
+ ContentType.TEXT_XML: PROV_XML,
+ ContentType.APP_XML: PROV_XML,
+ ContentType.APP_NT: PROV_NT,
+ }
+ _rev_path_types = {_prov_type: _ctype for _ctype, _prov_type in _media_types.items()}
+
+ @classmethod
+ def get( # pylint: disable=W0221,W0237 # arguments differ/renamed for clarity
+ cls,
+ prov_format, # type: Optional[AnyProvenanceFormat]
+ default=None, # type: Optional[Any]
+ allow_media_type=False, # type: bool
+ ): # type: (...) -> Optional["ProvenanceFormat"]
+ prov = super().get(prov_format, default=default)
+ if prov is None and allow_media_type:
+ prov = cls._media_types.get(prov_format)
+ return prov
+ return prov
+
+ @classmethod
+ def media_types(cls):
+ # type: () -> List[ContentType]
+ return list(cls._media_types)
+
+ @classmethod
+ def formats(cls):
+ # type: () -> List["ProvenanceFormat"]
+ return cls.values()
+
+ @classmethod
+ def as_media_type(cls, prov_format):
+ # type: (Optional[AnyProvenanceFormat]) -> Optional[AnyContentType]
+ return cls._rev_path_types.get(prov_format)
+
+ @classmethod
+ def resolve_compatible_formats(
+ cls,
+ prov, # type: Optional[Union[ProvenancePathType, str]]
+ prov_format, # type: Optional[Union[ProvenanceFormat, str]]
+ output_format, # type: Optional[Union[OutputFormat, str]]
+ ): # type: (...) -> Tuple[Optional[ProvenanceFormat], Optional[str]]
+ """
+ Resolves multiple :class:`OutputFormat` and :class:`ProvenanceFormat` combinations for compatible formats.
+
+ Compatible formats depend on the PROV endpoint being requested.
+ If output format is not specified, apply the corresponding PROV format that will work transparently.
+ Otherwise, ensure they are aligned against the expected PROV endpoints and supported :term:`Media-Types`.
+
+ :returns:
+ Tuple of a resolved PROV format if only the output format was specified,
+ and the relevant error detail if they are incompatible.
+ """
+ prov = ProvenancePathType.get(prov, default=ProvenancePathType.PROV)
+ prov_format = ProvenanceFormat.get(prov_format)
+ default_format = output_format
+ output_format = OutputFormat.get(output_format)
+
+ # if default was originally falsy, it would have been replaced by 'JSON'
+ # ignore it in this case to resolve any explicitly specified PROV format by itself
+ if not output_format or not default_format:
+ if prov == ProvenancePathType.PROV:
+ prov_format = prov_format or ProvenanceFormat.PROV_JSON
+ else:
+ prov_format = None
+ return prov_format, None
+
+ out_fmt = output_format.split("+", 1)[0]
+ err_mismatch = (
+ None,
+ f"output format '{output_format}' conflicts with PROV format '{prov_format}'"
+ )
+
+ # only main PROV endpoint supports alternate formats
+ # all others are plain text only
+ if prov not in [None, ProvenancePathType.PROV]:
+ if out_fmt in [OutputFormat.TEXT, OutputFormat.TXT]:
+ return None, None
+ return err_mismatch
+
+ if out_fmt in [OutputFormat.JSON, OutputFormat.YAML, OutputFormat.YML]:
+ if prov_format not in [None, ProvenanceFormat.PROV_JSON, ProvenanceFormat.PROV_JSONLD]:
+ return err_mismatch
+ if prov_format is None:
+ prov_format = ProvenanceFormat.PROV_JSON
+ return prov_format, None
+
+ if out_fmt in [OutputFormat.XML]:
+ if prov_format not in [None, ProvenanceFormat.PROV_XML]:
+ return err_mismatch
+ if prov_format is None:
+ prov_format = ProvenanceFormat.PROV_XML
+ return prov_format, None
+
+ if out_fmt in [OutputFormat.TEXT, OutputFormat.TXT]:
+ if prov_format not in [None, ProvenanceFormat.PROV_N, ProvenanceFormat.PROV_NT,
+ ProvenanceFormat.PROV_TURTLE]:
+ return err_mismatch
+ if prov_format is None:
+ prov_format = ProvenanceFormat.PROV_N
+ return prov_format, None
+
+ return None, f"output format '{output_format}' does not have any PROV equivalent"
+
+
+class WeaverResearchObject(ResearchObject):
+ """
+ Defines extended :term:`Provenance` details with `Weaver` operations and referencing the active server instance.
+ """
+
+ def __init__(self, job, settings, fs_access, temp_prefix_ro="tmp", orcid="", full_name=""):
+ # type: (Job, AnySettingsContainer, StdFsAccess, str, str, str) -> None
+ super(WeaverResearchObject, self).__init__(fs_access, temp_prefix_ro, orcid, full_name)
+
+ # rewrite auto-initialized random UUIDs with Weaver-specific references
+ self.job = job
+ self.ro_uuid = job.uuid
+ self.base_uri = f"arcp://uuid,{self.ro_uuid}/"
+ self.settings = settings
+
+ @staticmethod
+ def sha1_uuid(document, identifier):
+ # type: (ProvDocument, str) -> str
+ """
+ Generate a prefixed SHA1 hash from the identifier value.
+ """
+ sha1_ns = document._namespaces[cwl_prov_const.DATA]
+ sha1_id = f"{sha1_ns.prefix}:{hashlib.sha1(identifier.encode(), usedforsecurity=False).hexdigest()}"
+ return sha1_id
+
+ def initialize_provenance(self, full_name, host_provenance, user_provenance, orcid, fsaccess, run_uuid=None):
+ # type: (str, bool, bool, str, StdFsAccess, Optional[UUID]) -> ProvenanceProfile
+ """
+ Hook `Weaver` metadata onto user provenance step.
+ """
+ prov_profile = super(WeaverResearchObject, self).initialize_provenance(
+ full_name=full_name,
+ host_provenance=host_provenance,
+ user_provenance=user_provenance,
+ orcid=orcid,
+ fsaccess=fsaccess,
+ run_uuid=run_uuid,
+ )
+ document = prov_profile.document
+
+ doi_ns = document.add_namespace("doi", "https://doi.org/")
+
+ weaver_full_name = f"crim-ca/weaver:{weaver_version}"
+ weaver_code_url = "https://github.com/crim-ca/weaver"
+ weaver_code_sha1 = self.sha1_uuid(document, weaver_code_url)
+ weaver_code_entity = document.entity(
+ weaver_code_sha1,
+ {
+ prov_const.PROV_TYPE: prov_const.PROV["PrimarySource"],
+ prov_const.PROV_LABEL: "Source code repository",
+ prov_const.PROV_LOCATION: weaver_code_url,
+ },
+ )
+
+ weaver_url = get_weaver_url(self.settings)
+ weaver_desc = self.settings.get(
+ "weaver.wps_metadata_identification_abstract",
+ "Weaver OGC API Processes Server"
+ )
+ weaver_instance_sha1 = self.sha1_uuid(document, weaver_url)
+ weaver_instance_meta = [
+ (prov_const.PROV_TYPE, prov_const.PROV["SoftwareAgent"]),
+ (prov_const.PROV_LOCATION, weaver_url),
+ (prov_const.PROV_LABEL, weaver_desc),
+ (prov_const.PROV_LABEL, weaver_full_name),
+ (prov_const.PROV_ATTR_GENERAL_ENTITY, weaver_code_sha1),
+ (prov_const.PROV_ATTR_SPECIFIC_ENTITY, f"{doi_ns.prefix}:10.5281/zenodo.14210717"), # see CITATION.cff
+ ]
+ weaver_instance_agent = document.agent(weaver_instance_sha1, weaver_instance_meta)
+
+ crim_name = "Computer Research Institute of Montréal"
+ crim_sha1 = self.sha1_uuid(document, crim_name)
+ crim_entity = document.entity(
+ crim_sha1,
+ {
+ prov_const.PROV_TYPE: prov_const.PROV["Organization"],
+ cwl_prov_const.FOAF["name"]: crim_name,
+ cwl_prov_const.SCHEMA["name"]: crim_name,
+ }
+ )
+
+ server_provider_name = self.settings.get("weaver.wps_metadata_provider_name")
+ server_provider_url = self.settings.get("weaver.wps_metadata_provider_url")
+ server_provider_meta = []
+ server_provider_entity = None
+ if server_provider_name:
+ server_provider_meta.extend([
+ (cwl_prov_const.FOAF["name"], server_provider_name),
+ (cwl_prov_const.SCHEMA["name"], server_provider_name),
+ ])
+ if server_provider_url:
+ server_provider_meta.extend([
+ (prov_const.PROV_LOCATION, server_provider_url),
+ ])
+ if server_provider_meta:
+ server_provider_id = server_provider_url or server_provider_name
+ server_provider_sha1 = self.sha1_uuid(document, server_provider_id)
+ server_provider_meta.extend([
+ (prov_const.PROV_TYPE, prov_const.PROV["Organization"]),
+ (prov_const.PROV_LABEL, "Server Provider"),
+ ])
+ server_provider_entity = document.entity(
+ server_provider_sha1,
+ server_provider_meta,
+ )
+
+ job_entity = document.entity(
+ self.job.uuid.urn,
+ {
+ prov_const.PROV_TYPE: cwl_prov_const.WFDESC["ProcessRun"],
+ prov_const.PROV_LOCATION: self.job.job_url(self.settings),
+ prov_const.PROV_LABEL: "Job Information",
+ }
+ )
+ proc_url = self.job.process_url(self.settings)
+ proc_id = f"{self.job.service}:{self.job.process}" if self.job.service else self.job.process
+ proc_uuid = f"{weaver_instance_sha1}:{proc_id}"
+ proc_entity = document.entity(
+ proc_uuid,
+ {
+ prov_const.PROV_TYPE: cwl_prov_const.WFDESC["Process"],
+ prov_const.PROV_LOCATION: proc_url,
+ prov_const.PROV_LABEL: "Process Description",
+ }
+ )
+
+ # following agents are expected to exist (created by inherited class)
+ cwltool_agent = document.get_record(cwl_prov_const.ACCOUNT_UUID)[0]
+ user_agent = document.get_record(cwl_prov_const.USER_UUID)[0]
+ wf_agent = document.get_record(self.engine_uuid)[0] # current job run aligned with cwl workflow
+
+ # define relationships cross-references: https://wf4ever.github.io/ro/wfprov.owl
+ document.primary_source(weaver_instance_agent, weaver_code_entity)
+ document.actedOnBehalfOf(weaver_instance_agent, user_agent)
+ document.specializationOf(weaver_instance_agent, cwltool_agent)
+ document.attribution(crim_entity, weaver_code_entity)
+ document.wasDerivedFrom(cwltool_agent, weaver_instance_agent)
+ document.wasStartedBy(job_entity, weaver_instance_agent)
+ document.wasStartedBy(wf_agent, job_entity, time=self.job.created)
+ document.specializationOf(wf_agent, job_entity)
+ document.alternateOf(wf_agent, job_entity)
+ document.wasGeneratedBy(job_entity, proc_entity)
+ if server_provider_entity:
+ document.derivation(server_provider_entity, weaver_instance_agent)
+ document.attribution(server_provider_entity, weaver_instance_agent)
+
+ return prov_profile
+
+ def resolve_user(self):
+ # type: () -> Tuple[str, str]
+ """
+ Override :mod:`cwltool` default machine user.
+ """
+ weaver_full_name = f"crim-ca/weaver:{weaver_version}"
+ return weaver_full_name, weaver_full_name
+
+ def resolve_host(self):
+ # type: () -> Tuple[str, str]
+ """
+ Override :mod:`cwltool` default machine host.
+ """
+ weaver_url = get_weaver_url(self.settings)
+ weaver_fqdn = urlparse(weaver_url).hostname
+ return weaver_fqdn, weaver_url
diff --git a/weaver/typedefs.py b/weaver/typedefs.py
index a41c27a40..08451bd6c 100644
--- a/weaver/typedefs.py
+++ b/weaver/typedefs.py
@@ -88,6 +88,7 @@
from weaver.visibility import AnyVisibility
Path = Union[os.PathLike[str], str, bytes]
+ URL = str
Default = TypeVar("Default") # used for return value that is employed from a provided default value
Params = ParamSpec("Params") # use with 'Callable[Params, Return]', 'Params.args' and 'Params.kwargs'
diff --git a/weaver/wps_restapi/api.py b/weaver/wps_restapi/api.py
index d7cc95640..f0f800e77 100644
--- a/weaver/wps_restapi/api.py
+++ b/weaver/wps_restapi/api.py
@@ -103,6 +103,7 @@ def get_conformance(category, settings):
# ogcapi_proc_part3 = "http://www.opengis.net/spec/ogcapi-processes-3/1.0"
ogcapi_proc_enabled = asbool(settings.get("weaver.wps_restapi", True))
ogcapi_proc_html = asbool(settings.get("weaver.wps_restapi_html", True))
+ ogcapi_proc_prov = asbool(settings.get("weaver.cwl_prov", True))
ogcapi_proc_conformance = ([
f"{ogcapi_common}/conf/core",
f"{ogcapi_common}/per/core/additional-link-relations",
@@ -523,6 +524,7 @@ def get_conformance(category, settings):
# FIXME: support openEO processes (https://github.com/crim-ca/weaver/issues/564)
# f"{ogcapi_proc_part3}/conf/openeo-workflows",
# f"{ogcapi_proc_part3}/req/openeo-workflows",
+ f"{ogcapi_proc_part4}/conf/job-management",
f"{ogcapi_proc_part4}/conf/jm/create/post-op",
f"{ogcapi_proc_part4}/per/job-management/additional-status-codes", # see 'weaver.status.map_status'
f"{ogcapi_proc_part4}/per/job-management/create-body", # Weaver has XML for WPS
@@ -539,7 +541,7 @@ def get_conformance(category, settings):
f"{ogcapi_proc_part4}/req/job-management/create-response-body",
f"{ogcapi_proc_part4}/req/job-management/create-response-jobid",
f"{ogcapi_proc_part4}/req/job-management/create-response-success",
- # FIXME: support Content-Schema and Profile header negociation (https://github.com/crim-ca/weaver/issues/754)
+ # FIXME: support Content-Schema and Profile header negotiation (https://github.com/crim-ca/weaver/issues/754)
# f"{ogcapi_proc_part4}/req/job-management/create-unsupported-schema",
f"{ogcapi_proc_part4}/req/job-management/create-unsupported-media-type",
f"{ogcapi_proc_part4}/req/job-management/definition-get-op",
@@ -552,6 +554,14 @@ def get_conformance(category, settings):
f"{ogcapi_proc_part4}/req/job-management/update-patch-op",
f"{ogcapi_proc_part4}/req/job-management/update-response",
f"{ogcapi_proc_part4}/req/job-management/update-response-locked",
+ ] + ([
+ f"{ogcapi_proc_part4}/req/provenance",
+ f"{ogcapi_proc_part4}/req/provenance/prov-get-op",
+ f"{ogcapi_proc_part4}/req/provenance/prov-response",
+ f"{ogcapi_proc_part4}/req/provenance/prov-content-negotiation",
+ f"{ogcapi_proc_part4}/req/provenance/inputs-get-op",
+ f"{ogcapi_proc_part4}/req/provenance/inputs-response",
+ ] if ogcapi_proc_prov else []) + [
# FIXME: employ 'weaver.wps_restapi.quotation.utils.check_quotation_supported' to add below conditionally
# FIXME: https://github.com/crim-ca/weaver/issues/156 (billing/quotation)
# https://github.com/opengeospatial/ogcapi-processes/tree/master/extensions/billing
@@ -660,6 +670,7 @@ def api_frontpage_body(settings):
weaver_url = get_weaver_url(settings)
weaver_config = get_weaver_configuration(settings)
+ weaver_rtd_url = "https://pavics-weaver.readthedocs.io/en/latest"
weaver_api = asbool(settings.get("weaver.wps_restapi", True))
weaver_api_url = get_wps_restapi_base_url(settings)
weaver_api_oas_ui = weaver_url + sd.api_openapi_ui_service.path if weaver_api else None
@@ -669,12 +680,19 @@ def api_frontpage_body(settings):
weaver_api_ref = settings.get("weaver.wps_restapi_ref", None) if weaver_api else None
weaver_api_html = asbool(settings.get("weaver.wps_restapi_html", True)) and weaver_api
weaver_api_html_url = f"{weaver_api_url}?f={OutputFormat.HTML}"
+ weaver_api_prov = asbool(settings.get("weaver.cwl_prov", True)) and weaver_api
+ weaver_api_prov_doc = f"{weaver_rtd_url}/processes.html#job-provenance"
+ weaver_api_prov_oas = f"{weaver_api_oas_ui}#/Provenance" if weaver_api_prov else None
weaver_wps = asbool(settings.get("weaver.wps"))
weaver_wps_url = get_wps_url(settings) if weaver_wps else None
+ weaver_wps_oas = f"{weaver_api_oas_ui}#/WPS" if weaver_wps else None
weaver_conform_url = weaver_url + sd.api_conformance_service.path
weaver_process_url = weaver_api_url + sd.processes_service.path
weaver_jobs_url = weaver_api_url + sd.jobs_service.path
weaver_vault = asbool(settings.get("weaver.vault"))
+ weaver_vault_url = f"{weaver_api_url}/vault" if weaver_vault else None
+ weaver_vault_api = f"{weaver_api_oas_ui}#/Vault" if weaver_vault else None
+ weaver_vault_doc = f"{weaver_rtd_url}/processes.html#vault-upload"
weaver_links = [
{"href": weaver_url, "rel": "self", "type": ContentType.APP_JSON, "title": "This landing page."},
{"href": weaver_conform_url, "rel": "http://www.opengis.net/def/rel/ogc/1.0/conformance",
@@ -778,10 +796,13 @@ def api_frontpage_body(settings):
"description": __meta__.__description__,
"attribution": __meta__.__author__,
"parameters": [
- {"name": "api", "enabled": weaver_api, "url": weaver_api_url, "api": weaver_api_oas_ui},
+ {"name": "api", "enabled": weaver_api, "url": weaver_api_url,
+ "doc": weaver_rtd_url, "api": weaver_api_oas_ui},
{"name": "html", "enabled": weaver_api_html, "url": weaver_api_html_url, "api": weaver_api_oas_ui},
- {"name": "vault", "enabled": weaver_vault},
- {"name": "wps", "enabled": weaver_wps, "url": weaver_wps_url, "api": weaver_api_oas_ui},
+ {"name": "prov", "enabled": weaver_api_prov, "doc": weaver_api_prov_doc, "api": weaver_api_prov_oas},
+ {"name": "vault", "enabled": weaver_vault, "url": weaver_vault_url,
+ "doc": weaver_vault_doc, "api": weaver_vault_api},
+ {"name": "wps", "enabled": weaver_wps, "url": weaver_wps_url, "api": weaver_wps_oas},
],
"links": weaver_links,
}
diff --git a/weaver/wps_restapi/examples/job_prov.json b/weaver/wps_restapi/examples/job_prov.json
new file mode 100644
index 000000000..3543f60a8
--- /dev/null
+++ b/weaver/wps_restapi/examples/job_prov.json
@@ -0,0 +1,427 @@
+{
+ "prefix": {
+ "wfprov": "http://purl.org/wf4ever/wfprov#",
+ "wfdesc": "http://purl.org/wf4ever/wfdesc#",
+ "cwlprov": "https://w3id.org/cwl/prov#",
+ "foaf": "http://xmlns.com/foaf/0.1/",
+ "schema": "http://schema.org/",
+ "orcid": "https://orcid.org/",
+ "id": "urn:uuid:",
+ "data": "urn:hash::sha1:",
+ "sha256": "nih:sha-256;",
+ "researchobject": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/",
+ "metadata": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/metadata/",
+ "provenance": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/metadata/provenance/",
+ "wf": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/workflow/packed.cwl#",
+ "input": "arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/workflow/primary-job.json#",
+ "doi": "https://doi.org/",
+ "wf4ever": "http://purl.org/wf4ever/wf4ever#"
+ },
+ "agent": {
+ "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b": [
+ {},
+ {
+ "prov:type": {
+ "$": "foaf:OnlineAccount",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:location": "http://localhost:4002",
+ "cwlprov:hostname": "localhost"
+ },
+ {
+ "prov:type": {
+ "$": "foaf:OnlineAccount",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:label": "crim-ca/weaver:6.1.0",
+ "foaf:accountName": "crim-ca/weaver:6.1.0"
+ }
+ ],
+ "id:b3a49ee7-f620-4154-9e4d-d2e948748deb": {
+ "prov:type": [
+ {
+ "$": "schema:Person",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ {
+ "$": "prov:Person",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ ],
+ "prov:label": "crim-ca/weaver:6.1.0",
+ "foaf:name": "crim-ca/weaver:6.1.0",
+ "foaf:account": {
+ "$": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "schema:name": "crim-ca/weaver:6.1.0"
+ },
+ "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c": {
+ "prov:type": [
+ {
+ "$": "prov:SoftwareAgent",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ {
+ "$": "wfprov:WorkflowEngine",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ ],
+ "prov:label": "cwltool 3.1.20240708091338.dev15+g9c05bb7d"
+ },
+ "data:15401f8d937f5d526951c1bf20dcba16a1271d97": {
+ "prov:type": {
+ "$": "prov:SoftwareAgent",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:location": "https://example.com/weaver",
+ "prov:label": [
+ "Weaver internal WPS used for demo and testing.",
+ "crim-ca/weaver:6.1.0"
+ ],
+ "prov:generalEntity": "data:644e201526525f62152815a76a2dc773450f3dd9",
+ "prov:specificEntity": "doi:10.5281/zenodo.14210717"
+ },
+ "id:2148aee6-81e4-4bcd-9e48-78ff46a51ff1": {
+ "prov:type": {
+ "$": "prov:SoftwareAgent",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "cwlprov:image": "debian:stretch-slim",
+ "prov:label": "Container execution of image debian:stretch-slim"
+ }
+ },
+ "actedOnBehalfOf": {
+ "_:id1": {
+ "prov:delegate": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b",
+ "prov:responsible": "id:b3a49ee7-f620-4154-9e4d-d2e948748deb"
+ },
+ "_:id6": {
+ "prov:delegate": "data:15401f8d937f5d526951c1bf20dcba16a1271d97",
+ "prov:responsible": "id:b3a49ee7-f620-4154-9e4d-d2e948748deb"
+ }
+ },
+ "wasStartedBy": {
+ "_:id2": {
+ "prov:activity": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c",
+ "prov:starter": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b",
+ "prov:time": "2024-12-12T09:16:17.843783"
+ },
+ "_:id4": {
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:starter": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c",
+ "prov:time": "2024-12-12T09:16:17.843852"
+ },
+ "_:id10": {
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:trigger": "data:15401f8d937f5d526951c1bf20dcba16a1271d97"
+ },
+ "_:id11": {
+ "prov:activity": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c",
+ "prov:trigger": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:time": "2024-12-12T14:15:50.834000+00:00"
+ }
+ },
+ "activity": {
+ "id:1c49f085-bbd7-410d-a801-81fd42469e8a": {
+ "prov:startTime": "2024-12-12T09:16:17.843806",
+ "prov:type": {
+ "$": "wfprov:WorkflowRun",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:label": "Run of workflow/packed.cwl#main"
+ }
+ },
+ "wasAssociatedWith": {
+ "_:id3": {
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:agent": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c",
+ "prov:plan": "wf:main"
+ },
+ "_:id18": {
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:agent": "id:2148aee6-81e4-4bcd-9e48-78ff46a51ff1"
+ }
+ },
+ "entity": {
+ "data:644e201526525f62152815a76a2dc773450f3dd9": {
+ "prov:type": {
+ "$": "prov:PrimarySource",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:label": "Source code repository",
+ "prov:location": "https://github.com/crim-ca/weaver"
+ },
+ "data:3102f6d7a018ebae572f457d711ed7e1e7a11bc2": {
+ "prov:type": {
+ "$": "prov:Organization",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "foaf:name": "Computer Research Institute of Montr\u00e9al",
+ "schema:name": "Computer Research Institute of Montr\u00e9al"
+ },
+ "data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0": {
+ "foaf:name": "CRIM",
+ "schema:name": "CRIM",
+ "prov:location": "http://pavics-weaver.readthedocs.org/en/latest/",
+ "prov:type": {
+ "$": "prov:Organization",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:label": "Server Provider"
+ },
+ "id:1c49f085-bbd7-410d-a801-81fd42469e8a": {
+ "prov:type": {
+ "$": "wfdesc:ProcessRun",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:location": "https://example.com/weaver/processes/echo/jobs/1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:label": "Job Information"
+ },
+ "data:15401f8d937f5d526951c1bf20dcba16a1271d97:echo": {
+ "prov:type": {
+ "$": "wfdesc:Process",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:location": "https://example.com/weaver/processes/echo",
+ "prov:label": "Process Description"
+ },
+ "wf:main": {
+ "prov:type": [
+ {
+ "$": "wfdesc:Process",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ {
+ "$": "prov:Plan",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ ],
+ "prov:label": "Prospective provenance"
+ },
+ "data:2ef7bde608ce5404e97d5f042f95f89f1c232871": [
+ {
+ "prov:type": {
+ "$": "wfprov:Artifact",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:value": "Hello World!"
+ },
+ {
+ "prov:type": {
+ "$": "wfprov:Artifact",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ "prov:value": "Hello World!"
+ }
+ ],
+ "data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b": [
+ {
+ "prov:type": {
+ "$": "wfprov:Artifact",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ },
+ {}
+ ],
+ "id:59967079-217e-4bdb-92d7-2ef2f784825c": {
+ "prov:type": [
+ {
+ "$": "wf4ever:File",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ {
+ "$": "wfprov:Artifact",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ ],
+ "cwlprov:basename": "stdout.log",
+ "cwlprov:nameroot": "stdout",
+ "cwlprov:nameext": ".log"
+ },
+ "data:da39a3ee5e6b4b0d3255bfef95601890afd80709": {
+ "prov:type": {
+ "$": "wfprov:Artifact",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ },
+ "id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b": {
+ "prov:type": [
+ {
+ "$": "wf4ever:File",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ {
+ "$": "wfprov:Artifact",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ ],
+ "cwlprov:basename": "stderr.log",
+ "cwlprov:nameroot": "stderr",
+ "cwlprov:nameext": ".log"
+ },
+ "id:6b04550d-c2bd-400b-858b-14e287bbf8c3": {
+ "prov:type": [
+ {
+ "$": "wf4ever:File",
+ "type": "prov:QUALIFIED_NAME"
+ },
+ {
+ "$": "wfprov:Artifact",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ ],
+ "cwlprov:basename": "stdout.log",
+ "cwlprov:nameroot": "stdout",
+ "cwlprov:nameext": ".log"
+ }
+ },
+ "wasDerivedFrom": {
+ "_:id5": {
+ "prov:generatedEntity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97",
+ "prov:usedEntity": "data:644e201526525f62152815a76a2dc773450f3dd9",
+ "prov:type": {
+ "$": "prov:PrimarySource",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ },
+ "_:id9": {
+ "prov:generatedEntity": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b",
+ "prov:usedEntity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97"
+ },
+ "_:id15": {
+ "prov:generatedEntity": "data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0",
+ "prov:usedEntity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97"
+ }
+ },
+ "specializationOf": {
+ "_:id7": {
+ "prov:specificEntity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97",
+ "prov:generalEntity": "id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b"
+ },
+ "_:id12": {
+ "prov:specificEntity": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c",
+ "prov:generalEntity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a"
+ },
+ "_:id20": {
+ "prov:specificEntity": "id:59967079-217e-4bdb-92d7-2ef2f784825c",
+ "prov:generalEntity": "data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b"
+ },
+ "_:id22": {
+ "prov:specificEntity": "id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b",
+ "prov:generalEntity": "data:da39a3ee5e6b4b0d3255bfef95601890afd80709"
+ },
+ "_:id24": {
+ "prov:specificEntity": "id:6b04550d-c2bd-400b-858b-14e287bbf8c3",
+ "prov:generalEntity": "data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b"
+ }
+ },
+ "wasAttributedTo": {
+ "_:id8": {
+ "prov:entity": "data:3102f6d7a018ebae572f457d711ed7e1e7a11bc2",
+ "prov:agent": "data:644e201526525f62152815a76a2dc773450f3dd9"
+ },
+ "_:id16": {
+ "prov:entity": "data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0",
+ "prov:agent": "data:15401f8d937f5d526951c1bf20dcba16a1271d97"
+ }
+ },
+ "alternateOf": {
+ "_:id13": {
+ "prov:alternate1": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c",
+ "prov:alternate2": "id:1c49f085-bbd7-410d-a801-81fd42469e8a"
+ }
+ },
+ "wasGeneratedBy": {
+ "_:id14": {
+ "prov:entity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:activity": "data:15401f8d937f5d526951c1bf20dcba16a1271d97:echo"
+ },
+ "_:id21": {
+ "prov:entity": "id:59967079-217e-4bdb-92d7-2ef2f784825c",
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:time": "2024-12-12T09:16:18.867039",
+ "prov:role": {
+ "$": "wf:main/echo/output",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ },
+ "_:id23": {
+ "prov:entity": "id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b",
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:time": "2024-12-12T09:16:18.867039",
+ "prov:role": {
+ "$": "wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ },
+ "_:id25": {
+ "prov:entity": "id:6b04550d-c2bd-400b-858b-14e287bbf8c3",
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:time": "2024-12-12T09:16:18.867039",
+ "prov:role": {
+ "$": "wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ },
+ "_:id27": {
+ "prov:entity": "id:59967079-217e-4bdb-92d7-2ef2f784825c",
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:time": "2024-12-12T09:16:18.869180",
+ "prov:role": {
+ "$": "wf:main/primary/output",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ },
+ "_:id28": {
+ "prov:entity": "id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b",
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:time": "2024-12-12T09:16:18.869180",
+ "prov:role": {
+ "$": "wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ },
+ "_:id29": {
+ "prov:entity": "id:6b04550d-c2bd-400b-858b-14e287bbf8c3",
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:time": "2024-12-12T09:16:18.869180",
+ "prov:role": {
+ "$": "wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ }
+ },
+ "used": {
+ "_:id17": {
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:entity": "data:2ef7bde608ce5404e97d5f042f95f89f1c232871",
+ "prov:time": "2024-12-12T09:16:17.846280",
+ "prov:role": {
+ "$": "wf:main/message",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ },
+ "_:id19": {
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:entity": "data:2ef7bde608ce5404e97d5f042f95f89f1c232871",
+ "prov:time": "2024-12-12T09:16:17.863017",
+ "prov:role": {
+ "$": "wf:main/echo/message",
+ "type": "prov:QUALIFIED_NAME"
+ }
+ }
+ },
+ "wasEndedBy": {
+ "_:id26": {
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:ender": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:time": "2024-12-12T09:16:18.867034"
+ },
+ "_:id30": {
+ "prov:activity": "id:1c49f085-bbd7-410d-a801-81fd42469e8a",
+ "prov:ender": "id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c",
+ "prov:time": "2024-12-12T09:16:18.869299"
+ }
+ }
+}
diff --git a/weaver/wps_restapi/examples/job_prov.txt b/weaver/wps_restapi/examples/job_prov.txt
new file mode 100644
index 000000000..85d8c34c9
--- /dev/null
+++ b/weaver/wps_restapi/examples/job_prov.txt
@@ -0,0 +1,71 @@
+document
+ prefix wfprov
+ prefix wfdesc
+ prefix cwlprov
+ prefix foaf
+ prefix schema
+ prefix orcid
+ prefix id
+ prefix data
+ prefix sha256
+ prefix researchobject
+ prefix metadata
+ prefix provenance
+ prefix wf
+ prefix input
+ prefix doi
+ prefix wf4ever
+
+ agent(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b)
+ agent(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, [prov:type='foaf:OnlineAccount', prov:location="http://localhost:4002", cwlprov:hostname="localhost"])
+ agent(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, [prov:type='foaf:OnlineAccount', prov:label="crim-ca/weaver:6.0.0", foaf:accountName="crim-ca/weaver:6.0.0"])
+ agent(id:b3a49ee7-f620-4154-9e4d-d2e948748deb, [prov:type='schema:Person', prov:type='prov:Person', prov:label="crim-ca/weaver:6.0.0", foaf:name="crim-ca/weaver:6.0.0", foaf:account='id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b', schema:name="crim-ca/weaver:6.0.0"])
+ actedOnBehalfOf(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, id:b3a49ee7-f620-4154-9e4d-d2e948748deb, -)
+ agent(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, [prov:type='prov:SoftwareAgent', prov:type='wfprov:WorkflowEngine', prov:label="cwltool 3.1.20240708091338.dev15+g9c05bb7d"])
+ wasStartedBy(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, -, id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, 2024-12-12T09:16:17.843783)
+ activity(id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:17.843806, -, [prov:type='wfprov:WorkflowRun', prov:label="Run of workflow/packed.cwl#main"])
+ wasAssociatedWith(id:1c49f085-bbd7-410d-a801-81fd42469e8a, id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, wf:main)
+ wasStartedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, -, id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, 2024-12-12T09:16:17.843852)
+ entity(data:644e201526525f62152815a76a2dc773450f3dd9, [prov:type='prov:PrimarySource', prov:label="Source code repository", prov:location="https://github.com/crim-ca/weaver"])
+ agent(data:15401f8d937f5d526951c1bf20dcba16a1271d97, [prov:type='prov:SoftwareAgent', prov:location="http://localhost:4002", prov:label="Weaver internal WPS used for demo and testing.", prov:label="crim-ca/weaver:6.0.0", prov:generalEntity='data:644e201526525f62152815a76a2dc773450f3dd9', prov:specificEntity='doi:10.5281/zenodo.14210717'])
+ entity(data:3102f6d7a018ebae572f457d711ed7e1e7a11bc2, [prov:type='prov:Organization', foaf:name="Computer Research Institute of Montréal", schema:name="Computer Research Institute of Montréal"])
+ entity(data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0, [foaf:name="CRIM", schema:name="CRIM", prov:location="http://pavics-weaver.readthedocs.org/en/latest/", prov:type='prov:Organization', prov:label="Server Provider"])
+ entity(id:1c49f085-bbd7-410d-a801-81fd42469e8a, [prov:type='wfdesc:ProcessRun', prov:location="http://localhost:4002/processes/echo/jobs/1c49f085-bbd7-410d-a801-81fd42469e8a", prov:label="Job Information"])
+ entity(data:15401f8d937f5d526951c1bf20dcba16a1271d97:echo, [prov:type='wfdesc:Process', prov:location="http://localhost:4002/processes/echo", prov:label="Process Description"])
+ wasDerivedFrom(data:15401f8d937f5d526951c1bf20dcba16a1271d97, data:644e201526525f62152815a76a2dc773450f3dd9, -, -, -, [prov:type='prov:PrimarySource'])
+ actedOnBehalfOf(data:15401f8d937f5d526951c1bf20dcba16a1271d97, id:b3a49ee7-f620-4154-9e4d-d2e948748deb, -)
+ specializationOf(data:15401f8d937f5d526951c1bf20dcba16a1271d97, id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b)
+ wasAttributedTo(data:3102f6d7a018ebae572f457d711ed7e1e7a11bc2, data:644e201526525f62152815a76a2dc773450f3dd9)
+ wasDerivedFrom(id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b, data:15401f8d937f5d526951c1bf20dcba16a1271d97, -, -, -)
+ wasStartedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, data:15401f8d937f5d526951c1bf20dcba16a1271d97, -, -)
+ wasStartedBy(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, id:1c49f085-bbd7-410d-a801-81fd42469e8a, -, 2024-12-12T14:15:50.834000+00:00)
+ specializationOf(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, id:1c49f085-bbd7-410d-a801-81fd42469e8a)
+ alternateOf(id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, id:1c49f085-bbd7-410d-a801-81fd42469e8a)
+ wasGeneratedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, data:15401f8d937f5d526951c1bf20dcba16a1271d97:echo, -)
+ wasDerivedFrom(data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0, data:15401f8d937f5d526951c1bf20dcba16a1271d97, -, -, -)
+ wasAttributedTo(data:838cdfa4bbf09d1aedd26d79b46bfa8778ede2e0, data:15401f8d937f5d526951c1bf20dcba16a1271d97)
+ entity(wf:main, [prov:type='wfdesc:Process', prov:type='prov:Plan', prov:label="Prospective provenance"])
+ entity(data:2ef7bde608ce5404e97d5f042f95f89f1c232871, [prov:type='wfprov:Artifact', prov:value="Hello World!"])
+ used(id:1c49f085-bbd7-410d-a801-81fd42469e8a, data:2ef7bde608ce5404e97d5f042f95f89f1c232871, 2024-12-12T09:16:17.846280, [prov:role='wf:main/message'])
+ agent(id:2148aee6-81e4-4bcd-9e48-78ff46a51ff1, [prov:type='prov:SoftwareAgent', cwlprov:image="debian:stretch-slim", prov:label="Container execution of image debian:stretch-slim"])
+ wasAssociatedWith(id:1c49f085-bbd7-410d-a801-81fd42469e8a, id:2148aee6-81e4-4bcd-9e48-78ff46a51ff1, -)
+ entity(data:2ef7bde608ce5404e97d5f042f95f89f1c232871, [prov:type='wfprov:Artifact', prov:value="Hello World!"])
+ used(id:1c49f085-bbd7-410d-a801-81fd42469e8a, data:2ef7bde608ce5404e97d5f042f95f89f1c232871, 2024-12-12T09:16:17.863017, [prov:role='wf:main/echo/message'])
+ entity(data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b, [prov:type='wfprov:Artifact'])
+ entity(id:59967079-217e-4bdb-92d7-2ef2f784825c, [prov:type='wf4ever:File', prov:type='wfprov:Artifact', cwlprov:basename="stdout.log", cwlprov:nameroot="stdout", cwlprov:nameext=".log"])
+ specializationOf(id:59967079-217e-4bdb-92d7-2ef2f784825c, data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b)
+ wasGeneratedBy(id:59967079-217e-4bdb-92d7-2ef2f784825c, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.867039, [prov:role='wf:main/echo/output'])
+ entity(data:da39a3ee5e6b4b0d3255bfef95601890afd80709, [prov:type='wfprov:Artifact'])
+ entity(id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b, [prov:type='wf4ever:File', prov:type='wfprov:Artifact', cwlprov:basename="stderr.log", cwlprov:nameroot="stderr", cwlprov:nameext=".log"])
+ specializationOf(id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b, data:da39a3ee5e6b4b0d3255bfef95601890afd80709)
+ wasGeneratedBy(id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.867039, [prov:role='wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4'])
+ entity(data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b)
+ entity(id:6b04550d-c2bd-400b-858b-14e287bbf8c3, [prov:type='wf4ever:File', prov:type='wfprov:Artifact', cwlprov:basename="stdout.log", cwlprov:nameroot="stdout", cwlprov:nameext=".log"])
+ specializationOf(id:6b04550d-c2bd-400b-858b-14e287bbf8c3, data:a0b65939670bc2c010f4d5d6a0b3e4e4590fb92b)
+ wasGeneratedBy(id:6b04550d-c2bd-400b-858b-14e287bbf8c3, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.867039, [prov:role='wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1'])
+ wasEndedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, -, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.867034)
+ wasGeneratedBy(id:59967079-217e-4bdb-92d7-2ef2f784825c, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.869180, [prov:role='wf:main/primary/output'])
+ wasGeneratedBy(id:2ab450c1-7309-4c5b-b65b-b4dfa44f384b, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.869180, [prov:role='wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4'])
+ wasGeneratedBy(id:6b04550d-c2bd-400b-858b-14e287bbf8c3, id:1c49f085-bbd7-410d-a801-81fd42469e8a, 2024-12-12T09:16:18.869180, [prov:role='wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1'])
+ wasEndedBy(id:1c49f085-bbd7-410d-a801-81fd42469e8a, -, id:eea8f699-1c08-47b2-8aed-8ad30e4eda4c, 2024-12-12T09:16:18.869299)
+endDocument
diff --git a/weaver/wps_restapi/examples/job_prov.xml b/weaver/wps_restapi/examples/job_prov.xml
new file mode 100644
index 000000000..c25e44535
--- /dev/null
+++ b/weaver/wps_restapi/examples/job_prov.xml
@@ -0,0 +1,267 @@
+
+
+
+
+ http://localhost:4002
+ foaf:OnlineAccount
+ localhost
+
+
+ crim-ca/weaver:6.0.0
+ foaf:OnlineAccount
+ crim-ca/weaver:6.0.0
+
+
+ crim-ca/weaver:6.0.0
+ schema:Person
+ id:53a6e2b3-6ee3-4f37-a2e5-72af3e97c70b
+ crim-ca/weaver:6.0.0
+ crim-ca/weaver:6.0.0
+
+
+
+
+
+
+ cwltool 3.1.20240708091338.dev15+g9c05bb7d
+ wfprov:WorkflowEngine
+
+
+
+
+ 2024-12-12T09:16:17.843783
+
+
+ 2024-12-12T09:16:17.843806
+ Run of workflow/packed.cwl#main
+ wfprov:WorkflowRun
+
+
+
+
+
+
+
+
+
+ 2024-12-12T09:16:17.843852
+
+
+ Source code repository
+ https://github.com/crim-ca/weaver
+
+
+ Weaver internal WPS used for demo and testing.
+ crim-ca/weaver:6.0.0
+ http://localhost:4002
+
+
+
+
+ Computer Research Institute of Montréal
+ Computer Research Institute of Montréal
+
+
+ Server Provider
+ http://pavics-weaver.readthedocs.org/en/latest/
+ CRIM
+ CRIM
+
+
+ Job Information
+ http://localhost:4002/processes/echo/jobs/1c49f085-bbd7-410d-a801-81fd42469e8a
+ wfdesc:ProcessRun
+
+
+ Process Description
+ http://localhost:4002/processes/echo
+ wfdesc:Process
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 2024-12-12T14:15:50.834000+00:00
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Prospective provenance
+ wfdesc:Process
+
+
+ wfprov:Artifact
+ Hello World!
+
+
+
+
+ 2024-12-12T09:16:17.846280
+ wf:main/message
+
+
+ Container execution of image debian:stretch-slim
+ debian:stretch-slim
+
+
+
+
+
+
+ wfprov:Artifact
+ Hello World!
+
+
+
+
+ 2024-12-12T09:16:17.863017
+ wf:main/echo/message
+
+
+ wfprov:Artifact
+
+
+ wf4ever:File
+ wfprov:Artifact
+ stdout.log
+ .log
+ stdout
+
+
+
+
+
+
+
+
+ 2024-12-12T09:16:18.867039
+ wf:main/echo/output
+
+
+ wfprov:Artifact
+
+
+ wf4ever:File
+ wfprov:Artifact
+ stderr.log
+ .log
+ stderr
+
+
+
+
+
+
+
+
+ 2024-12-12T09:16:18.867039
+ wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4
+
+
+
+ wf4ever:File
+ wfprov:Artifact
+ stdout.log
+ .log
+ stdout
+
+
+
+
+
+
+
+
+ 2024-12-12T09:16:18.867039
+ wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1
+
+
+
+
+ 2024-12-12T09:16:18.867034
+
+
+
+
+ 2024-12-12T09:16:18.869180
+ wf:main/primary/output
+
+
+
+
+ 2024-12-12T09:16:18.869180
+ wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4
+
+
+
+
+ 2024-12-12T09:16:18.869180
+ wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1
+
+
+
+
+ 2024-12-12T09:16:18.869299
+
+
diff --git a/weaver/wps_restapi/examples/job_prov_info.txt b/weaver/wps_restapi/examples/job_prov_info.txt
new file mode 100644
index 000000000..b353a3018
--- /dev/null
+++ b/weaver/wps_restapi/examples/job_prov_info.txt
@@ -0,0 +1,5 @@
+Research Object of CWL workflow run
+Research Object ID: arcp://uuid,1c49f085-bbd7-410d-a801-81fd42469e8a/
+Profile: https://w3id.org/cwl/prov/0.6.0
+Workflow run ID: urn:uuid:1c49f085-bbd7-410d-a801-81fd42469e8a
+Packaged: 2024-12-12
diff --git a/weaver/wps_restapi/examples/job_prov_run.txt b/weaver/wps_restapi/examples/job_prov_run.txt
new file mode 100644
index 000000000..cdb34242c
--- /dev/null
+++ b/weaver/wps_restapi/examples/job_prov_run.txt
@@ -0,0 +1,10 @@
+2024-12-12 09:16:17.843852 Flow 1c49f085-bbd7-410d-a801-81fd42469e8a [ Run of workflow/packed.cwl#main
+2024-12-12 09:16:17.846280 In 2ef7bde608ce5404e97d5f042f95f89f1c232871 < wf:main/message
+2024-12-12 09:16:17.863017 In 2ef7bde608ce5404e97d5f042f95f89f1c232871 < wf:main/echo/message
+ 2024-12-12 09:16:18.867039 Out 59967079-217e-4bdb-92d7-2ef2f784825c > wf:main/echo/output
+ 2024-12-12 09:16:18.867039 Out 2ab450c1-7309-4c5b-b65b-b4dfa44f384b > wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4
+ 2024-12-12 09:16:18.867039 Out 6b04550d-c2bd-400b-858b-14e287bbf8c3 > wf:main/echo/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1
+ 2024-12-12 09:16:18.869180 Out 59967079-217e-4bdb-92d7-2ef2f784825c > wf:main/primary/output
+ 2024-12-12 09:16:18.869180 Out 2ab450c1-7309-4c5b-b65b-b4dfa44f384b > wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_c46f5d8d-e599-4152-8ded-f529e19524d4
+ 2024-12-12 09:16:18.869180 Out 6b04550d-c2bd-400b-858b-14e287bbf8c3 > wf:main/primary/PACKAGE_OUTPUT_HOOK_LOG_606e3f88-0388-4c97-b139-58fed24b37c1
+ 2024-12-12 09:16:18.867034 Flow 1c49f085-bbd7-410d-a801-81fd42469e8a ] Run of workflow/packed.cwl#main (0:00:01.023182)
diff --git a/weaver/wps_restapi/examples/job_prov_who.txt b/weaver/wps_restapi/examples/job_prov_who.txt
new file mode 100644
index 000000000..a20e771f4
--- /dev/null
+++ b/weaver/wps_restapi/examples/job_prov_who.txt
@@ -0,0 +1,2 @@
+Packaged By: cwltool 3.1.20240708091338.dev15+g9c05bb7d
+Executed By: crim-ca/weaver:6.1.0
diff --git a/weaver/wps_restapi/jobs/__init__.py b/weaver/wps_restapi/jobs/__init__.py
index d9df9ee7b..e28ecc3ac 100644
--- a/weaver/wps_restapi/jobs/__init__.py
+++ b/weaver/wps_restapi/jobs/__init__.py
@@ -1,6 +1,10 @@
import logging
from typing import TYPE_CHECKING
+from pyramid.settings import asbool
+
+from weaver.utils import get_settings
+
if TYPE_CHECKING:
from pyramid.config import Configurator
@@ -11,3 +15,14 @@ def includeme(config):
# type: (Configurator) -> None
LOGGER.info("Adding WPS REST API jobs...")
config.include("weaver.wps_restapi.jobs.jobs")
+
+ settings = get_settings(config)
+ weaver_cwl_prov = asbool(settings.get("weaver.cwl_prov", True))
+ if not weaver_cwl_prov:
+ LOGGER.warning(
+ "Skipping Weaver PROV views [weaver.cwl_prov=false]. "
+ "Job Provenance endpoints will not be available."
+ )
+ else:
+ LOGGER.info("Adding Weaver REST API Job Provenance....")
+ config.include("weaver.wps_restapi.jobs.prov")
diff --git a/weaver/wps_restapi/jobs/jobs.py b/weaver/wps_restapi/jobs/jobs.py
index e5a17e953..e507e2826 100644
--- a/weaver/wps_restapi/jobs/jobs.py
+++ b/weaver/wps_restapi/jobs/jobs.py
@@ -71,7 +71,7 @@
accept=ContentType.TEXT_HTML,
renderer="weaver.wps_restapi:templates/responses/job_listing.mako",
response_schemas=sd.derive_responses(
- sd.get_prov_all_jobs_responses,
+ sd.get_provider_all_jobs_responses,
sd.GenericHTMLResponse(name="HTMLProviderJobListing", description="Listing of jobs.")
),
)
@@ -80,7 +80,7 @@
schema=sd.GetProviderJobsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_prov_all_jobs_responses,
+ response_schemas=sd.get_provider_all_jobs_responses,
)
@sd.process_jobs_service.get(
tags=[sd.TAG_JOBS, sd.TAG_PROCESSES],
@@ -325,7 +325,7 @@ def trigger_job_execution(request):
for profile in JobStatusSchema.values()
],
renderer=OutputFormat.JSON,
- response_schemas=sd.get_prov_single_job_status_responses,
+ response_schemas=sd.get_provider_single_job_status_responses,
)
@sd.process_job_service.get(
tags=[sd.TAG_PROCESSES, sd.TAG_JOBS, sd.TAG_STATUS],
@@ -401,7 +401,7 @@ def update_pending_job(request):
schema=sd.DeleteProviderJobEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.delete_prov_job_responses,
+ response_schemas=sd.delete_provider_job_responses,
)
@sd.process_job_service.delete(
tags=[sd.TAG_JOBS, sd.TAG_DISMISS, sd.TAG_PROCESSES],
@@ -499,7 +499,7 @@ def cancel_job_batch(request):
schema=sd.ProviderInputsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_prov_inputs_responses,
+ response_schemas=sd.get_provider_inputs_responses,
)
@sd.process_inputs_service.get(
tags=[sd.TAG_JOBS, sd.TAG_RESULTS, sd.TAG_PROCESSES],
@@ -555,7 +555,7 @@ def get_job_inputs(request):
schema=sd.ProviderOutputsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_prov_outputs_responses,
+ response_schemas=sd.get_provider_outputs_responses,
)
@sd.process_outputs_service.get(
tags=[sd.TAG_JOBS, sd.TAG_RESULTS, sd.TAG_PROCESSES],
@@ -593,7 +593,7 @@ def get_job_outputs(request):
schema=sd.ProviderResultsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_prov_results_responses,
+ response_schemas=sd.get_provider_results_responses,
)
@sd.process_results_service.get(
tags=[sd.TAG_JOBS, sd.TAG_RESULTS, sd.TAG_PROCESSES],
@@ -625,21 +625,21 @@ def get_job_results(request):
schema=sd.ProviderExceptionsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_prov_exceptions_responses,
+ response_schemas=sd.get_provider_exceptions_responses,
)
@sd.process_exceptions_service.get(
tags=[sd.TAG_JOBS, sd.TAG_EXCEPTIONS, sd.TAG_PROCESSES],
schema=sd.ProcessExceptionsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_exceptions_responses,
+ response_schemas=sd.get_job_exceptions_responses,
)
@sd.job_exceptions_service.get(
tags=[sd.TAG_JOBS, sd.TAG_EXCEPTIONS],
schema=sd.JobExceptionsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_exceptions_responses,
+ response_schemas=sd.get_job_exceptions_responses,
)
@log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description)
def get_job_exceptions(request):
@@ -664,7 +664,7 @@ def get_job_exceptions(request):
ContentType.TEXT_PLAIN,
],
renderer=OutputFormat.JSON,
- response_schemas=sd.get_prov_logs_responses,
+ response_schemas=sd.get_provider_logs_responses,
)
@sd.process_logs_service.get(
tags=[sd.TAG_JOBS, sd.TAG_LOGS, sd.TAG_PROCESSES],
@@ -677,7 +677,7 @@ def get_job_exceptions(request):
ContentType.TEXT_PLAIN,
],
renderer=OutputFormat.JSON,
- response_schemas=sd.get_logs_responses,
+ response_schemas=sd.get_job_logs_responses,
)
@sd.job_logs_service.get(
tags=[sd.TAG_JOBS, sd.TAG_LOGS],
@@ -690,7 +690,7 @@ def get_job_exceptions(request):
ContentType.TEXT_PLAIN,
],
renderer=OutputFormat.JSON,
- response_schemas=sd.get_logs_responses,
+ response_schemas=sd.get_job_logs_responses,
)
@log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description)
def get_job_logs(request):
@@ -717,21 +717,21 @@ def get_job_logs(request):
schema=sd.ProviderJobStatisticsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_prov_stats_responses,
+ response_schemas=sd.get_provider_stats_responses,
)
@sd.process_stats_service.get(
tags=[sd.TAG_JOBS, sd.TAG_STATISTICS, sd.TAG_PROCESSES],
schema=sd.ProcessJobStatisticsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_stats_responses,
+ response_schemas=sd.get_job_stats_responses,
)
@sd.job_stats_service.get(
tags=[sd.TAG_JOBS, sd.TAG_STATISTICS],
schema=sd.JobStatisticsEndpoint(),
accept=ContentType.APP_JSON,
renderer=OutputFormat.JSON,
- response_schemas=sd.get_stats_responses,
+ response_schemas=sd.get_job_stats_responses,
)
@log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description)
def get_job_stats(request):
@@ -805,14 +805,6 @@ def includeme(config):
config.add_cornice_service(sd.job_exceptions_service)
config.add_cornice_service(sd.job_logs_service)
config.add_cornice_service(sd.job_stats_service)
- config.add_cornice_service(sd.provider_job_service)
- config.add_cornice_service(sd.provider_jobs_service)
- config.add_cornice_service(sd.provider_results_service)
- config.add_cornice_service(sd.provider_outputs_service)
- config.add_cornice_service(sd.provider_inputs_service)
- config.add_cornice_service(sd.provider_exceptions_service)
- config.add_cornice_service(sd.provider_logs_service)
- config.add_cornice_service(sd.provider_stats_service)
config.add_cornice_service(sd.process_jobs_service)
config.add_cornice_service(sd.process_job_service)
config.add_cornice_service(sd.process_results_service)
@@ -821,6 +813,14 @@ def includeme(config):
config.add_cornice_service(sd.process_exceptions_service)
config.add_cornice_service(sd.process_logs_service)
config.add_cornice_service(sd.process_stats_service)
+ config.add_cornice_service(sd.provider_job_service)
+ config.add_cornice_service(sd.provider_jobs_service)
+ config.add_cornice_service(sd.provider_results_service)
+ config.add_cornice_service(sd.provider_outputs_service)
+ config.add_cornice_service(sd.provider_inputs_service)
+ config.add_cornice_service(sd.provider_exceptions_service)
+ config.add_cornice_service(sd.provider_logs_service)
+ config.add_cornice_service(sd.provider_stats_service)
# backward compatibility routes (deprecated)
config.add_cornice_service(sd.job_result_service)
diff --git a/weaver/wps_restapi/jobs/prov.py b/weaver/wps_restapi/jobs/prov.py
new file mode 100644
index 000000000..c79db2c5b
--- /dev/null
+++ b/weaver/wps_restapi/jobs/prov.py
@@ -0,0 +1,238 @@
+import logging
+from typing import TYPE_CHECKING
+
+from weaver.exceptions import log_unhandled_exceptions
+from weaver.formats import ContentType
+from weaver.wps_restapi import swagger_definitions as sd
+from weaver.wps_restapi.jobs.utils import get_job_prov_response
+
+if TYPE_CHECKING:
+ from pyramid.config import Configurator
+
+ from weaver.typedefs import AnyResponseType, PyramidRequest
+
+LOGGER = logging.getLogger(__name__)
+
+
+@sd.provider_prov_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvEndpoint(),
+ accept=sd.JobProvAcceptHeader.validator.choices,
+ response_schemas=sd.get_job_prov_responses,
+)
+@sd.process_prov_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvEndpoint(),
+ accept=sd.JobProvAcceptHeader.validator.choices,
+ response_schemas=sd.get_job_prov_responses,
+)
+@sd.job_prov_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvEndpoint(),
+ accept=sd.JobProvAcceptHeader.validator.choices,
+ response_schemas=sd.get_job_prov_responses,
+)
+@sd.provider_prov_info_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.process_prov_info_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.job_prov_info_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.provider_prov_who_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.process_prov_who_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.job_prov_who_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.provider_prov_inputs_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.process_prov_inputs_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.job_prov_inputs_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.provider_prov_inputs_run_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.process_prov_inputs_run_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.job_prov_inputs_run_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.provider_prov_outputs_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.process_prov_outputs_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.job_prov_outputs_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.provider_prov_outputs_run_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.process_prov_outputs_run_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.job_prov_outputs_run_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_responses, # FIXME
+)
+@sd.provider_prov_run_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.process_prov_run_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.job_prov_run_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.provider_prov_run_id_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.process_prov_run_id_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.job_prov_run_id_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.provider_prov_runs_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROVIDERS],
+ schema=sd.ProviderJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.process_prov_runs_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE, sd.TAG_PROCESSES],
+ schema=sd.ProcessJobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@sd.job_prov_runs_service.get(
+ tags=[sd.TAG_JOBS, sd.TAG_PROVENANCE],
+ schema=sd.JobProvMetadataEndpoint(),
+ accept=ContentType.TEXT_PLAIN,
+ response_schemas=sd.get_job_prov_metadata_responses,
+)
+@log_unhandled_exceptions(logger=LOGGER, message=sd.InternalServerErrorResponseSchema.description)
+def get_job_prov(request):
+ # type: (PyramidRequest) -> AnyResponseType
+ """
+ Retrieve the provenance details of a job based on the contextual request path.
+ """
+ return get_job_prov_response(request)
+
+
+def includeme(config):
+ # type: (Configurator) -> None
+ LOGGER.info("Adding WPS REST API jobs PROV views...")
+ config.add_cornice_service(sd.job_prov_service)
+ config.add_cornice_service(sd.job_prov_info_service)
+ config.add_cornice_service(sd.job_prov_who_service)
+ config.add_cornice_service(sd.job_prov_inputs_service)
+ config.add_cornice_service(sd.job_prov_inputs_run_service)
+ config.add_cornice_service(sd.job_prov_outputs_service)
+ config.add_cornice_service(sd.job_prov_outputs_run_service)
+ config.add_cornice_service(sd.job_prov_run_service)
+ config.add_cornice_service(sd.job_prov_run_id_service)
+ config.add_cornice_service(sd.job_prov_runs_service)
+ config.add_cornice_service(sd.process_prov_service)
+ config.add_cornice_service(sd.process_prov_info_service)
+ config.add_cornice_service(sd.process_prov_who_service)
+ config.add_cornice_service(sd.process_prov_inputs_service)
+ config.add_cornice_service(sd.process_prov_inputs_run_service)
+ config.add_cornice_service(sd.process_prov_outputs_service)
+ config.add_cornice_service(sd.process_prov_outputs_run_service)
+ config.add_cornice_service(sd.process_prov_run_service)
+ config.add_cornice_service(sd.process_prov_run_id_service)
+ config.add_cornice_service(sd.process_prov_runs_service)
+ config.add_cornice_service(sd.provider_prov_service)
+ config.add_cornice_service(sd.provider_prov_info_service)
+ config.add_cornice_service(sd.provider_prov_who_service)
+ config.add_cornice_service(sd.provider_prov_inputs_service)
+ config.add_cornice_service(sd.provider_prov_inputs_run_service)
+ config.add_cornice_service(sd.provider_prov_outputs_service)
+ config.add_cornice_service(sd.provider_prov_outputs_run_service)
+ config.add_cornice_service(sd.provider_prov_run_service)
+ config.add_cornice_service(sd.provider_prov_run_id_service)
+ config.add_cornice_service(sd.provider_prov_runs_service)
diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py
index 3f4ee4673..e63fac5bf 100644
--- a/weaver/wps_restapi/jobs/utils.py
+++ b/weaver/wps_restapi/jobs/utils.py
@@ -14,6 +14,7 @@
HTTPInternalServerError,
HTTPLocked,
HTTPNoContent,
+ HTTPNotAcceptable,
HTTPNotFound,
HTTPOk
)
@@ -40,7 +41,14 @@
parse_prefer_header_return,
update_preference_applied_return_header
)
-from weaver.formats import ContentEncoding, ContentType, clean_media_type_format, get_format, repr_json
+from weaver.formats import (
+ ContentEncoding,
+ ContentType,
+ clean_media_type_format,
+ get_format,
+ guess_target_format,
+ repr_json
+)
from weaver.owsexceptions import OWSNoApplicableCode, OWSNotFound
from weaver.processes.constants import JobInputsOutputsSchema, JobStatusSchema
from weaver.processes.convert import any2wps_literal_datatype, convert_output_params_schema, get_field
@@ -1410,3 +1418,41 @@ def dismiss_job_task(job, container):
job.status = map_status(Status.DISMISSED)
job = store.update_job(job)
return job
+
+
+def get_job_prov_response(request):
+ # type: (PyramidRequest) -> AnyResponseType
+ """
+ Retrieve specific :term:`Provenance` contents of a :term:`Job` based on the request.
+
+ The specific request path is redirected to the relevant command from :mod:`cwlprov`.
+ If applicable, request :term:`Media-Type` specifiers are considered to return alternate representations.
+ """
+ job = get_job(request)
+ raise_job_dismissed(job, request)
+ raise_job_bad_status_success(job, request)
+
+ prov_type = guess_target_format(request, override_user_agent=True, default=ContentType.APP_JSON)
+ prov_path = request.path.rsplit("/prov", 1)[-1]
+ prov_path = f"/prov{prov_path}"
+ prov_data, prov_type = job.prov_data(request, prov_path, prov_type)
+ if not prov_data:
+ prov_dir = job.prov_path(request)
+ prov_exists = os.path.isdir(prov_dir)
+ prov_err = HTTPNotAcceptable if prov_exists else JobGone
+ prov_body = {
+ "title": "NoJobProvenance",
+ "type": "no-job-provenance", # unofficial
+ "detail": "Job provenance could not be retrieved for the specified job.",
+ "cause": "Missing or invalid provenance details."
+ }
+ if prov_exists and "run_id" in request.matchdict:
+ prov_err = JobNotFound
+ prov_body["error"] = "No such run ID for specified job provenance."
+ prov_body["value"] = {"run_id": str(request.matchdict["run_id"])}
+ prov_body["status"] = prov_err.code
+ return prov_err(json=prov_body, headers={"Content-Type": ContentType.APP_JSON})
+ links = job.links(container=request, self_link="provenance")
+ headers = [("Link", make_link_header(link)) for link in links]
+ headers.append(("Content-Type", prov_type))
+ return HTTPOk(body=prov_data, headers=headers)
diff --git a/weaver/wps_restapi/swagger_definitions.py b/weaver/wps_restapi/swagger_definitions.py
index 21475c1c2..0076e3cc1 100644
--- a/weaver/wps_restapi/swagger_definitions.py
+++ b/weaver/wps_restapi/swagger_definitions.py
@@ -117,6 +117,7 @@
JobStatusSchema,
ProcessSchema
)
+from weaver.provenance import ProvenanceFormat
from weaver.quotation.status import QuoteStatus
from weaver.sort import Sort, SortMethods
from weaver.status import JOB_STATUS_CODE_API, JOB_STATUS_SEARCH_API, Status
@@ -334,6 +335,7 @@
TAG_EXCEPTIONS = "Exceptions"
TAG_LOGS = "Logs"
TAG_STATISTICS = "Statistics"
+TAG_PROVENANCE = "Provenance"
TAG_VAULT = "Vault"
TAG_WPS = "WPS"
TAG_DEPRECATED = "Deprecated Endpoints"
@@ -359,11 +361,21 @@
jobs_service = Service(name="jobs", path="/jobs")
job_service = Service(name="job", path=f"{jobs_service.path}/{{job_id}}")
job_results_service = Service(name="job_results", path=f"{job_service.path}/results")
-job_exceptions_service = Service(name="job_exceptions", path=f"{job_service.path}/exceptions")
job_outputs_service = Service(name="job_outputs", path=f"{job_service.path}/outputs")
job_inputs_service = Service(name="job_inputs", path=f"{job_service.path}/inputs")
+job_exceptions_service = Service(name="job_exceptions", path=f"{job_service.path}/exceptions")
job_logs_service = Service(name="job_logs", path=f"{job_service.path}/logs")
job_stats_service = Service(name="job_stats", path=f"{job_service.path}/statistics")
+job_prov_service = Service(name="job_prov", path=f"{job_service.path}/prov")
+job_prov_info_service = Service(name="job_prov_info", path=f"{job_prov_service.path}/info")
+job_prov_who_service = Service(name="job_prov_who", path=f"{job_prov_service.path}/who")
+job_prov_inputs_service = Service(name="job_prov_inputs", path=f"{job_prov_service.path}/inputs")
+job_prov_inputs_run_service = Service(name="job_prov_inputs_run", path=f"{job_prov_service.path}/inputs/{{run_id}}")
+job_prov_outputs_service = Service(name="job_prov_outputs", path=f"{job_prov_service.path}/outputs")
+job_prov_outputs_run_service = Service(name="job_prov_outputs_run", path=f"{job_prov_service.path}/outputs/{{run_id}}")
+job_prov_run_service = Service(name="job_prov_run", path=f"{job_prov_service.path}/run")
+job_prov_run_id_service = Service(name="job_prov_run_id", path=f"{job_prov_service.path}/run/{{run_id}}")
+job_prov_runs_service = Service(name="job_prov_runs", path=f"{job_prov_service.path}/runs")
processes_service = Service(name="processes", path="/processes")
process_service = Service(name="process", path=f"{processes_service.path}/{{process_id}}")
@@ -373,6 +385,7 @@
process_visibility_service = Service(name="process_visibility", path=f"{process_service.path}/visibility")
process_package_service = Service(name="process_package", path=f"{process_service.path}/package")
process_payload_service = Service(name="process_payload", path=f"{process_service.path}/payload")
+process_execution_service = Service(name="process_execution", path=f"{process_service.path}/execution")
process_jobs_service = Service(name="process_jobs", path=process_service.path + jobs_service.path)
process_job_service = Service(name="process_job", path=process_service.path + job_service.path)
process_results_service = Service(name="process_results", path=process_service.path + job_results_service.path)
@@ -381,23 +394,92 @@
process_exceptions_service = Service(name="process_exceptions", path=process_service.path + job_exceptions_service.path)
process_logs_service = Service(name="process_logs", path=process_service.path + job_logs_service.path)
process_stats_service = Service(name="process_stats", path=process_service.path + job_stats_service.path)
-process_execution_service = Service(name="process_execution", path=f"{process_service.path}/execution")
+process_prov_service = Service(name="process_prov", path=process_service.path + job_prov_service.path)
+process_prov_info_service = Service(name="process_prov_info", path=process_service.path + job_prov_info_service.path)
+process_prov_who_service = Service(name="process_prov_who", path=process_service.path + job_prov_who_service.path)
+process_prov_inputs_service = Service(
+ name="process_prov_inputs",
+ path=process_service.path + job_prov_inputs_service.path,
+)
+process_prov_inputs_run_service = Service(
+ name="process_prov_inputs_run",
+ path=process_service.path + job_prov_inputs_run_service.path,
+)
+process_prov_outputs_service = Service(
+ name="process_prov_outputs",
+ path=process_service.path + job_prov_outputs_service.path,
+)
+process_prov_outputs_run_service = Service(
+ name="process_prov_outputs_run",
+ path=process_service.path + job_prov_outputs_run_service.path,
+)
+process_prov_run_service = Service(
+ name="process_prov_run",
+ path=process_service.path + job_prov_run_service.path,
+)
+process_prov_run_id_service = Service(
+ name="process_prov_run_id",
+ path=process_service.path + job_prov_run_id_service.path,
+)
+process_prov_runs_service = Service(
+ name="process_prov_runs",
+ path=process_service.path + job_prov_runs_service.path,
+)
providers_service = Service(name="providers", path="/providers")
provider_service = Service(name="provider", path=f"{providers_service.path}/{{provider_id}}")
provider_processes_service = Service(name="provider_processes", path=provider_service.path + processes_service.path)
provider_process_service = Service(name="provider_process", path=provider_service.path + process_service.path)
provider_process_package_service = Service(name="provider_process_pkg", path=f"{provider_process_service.path}/package")
+provider_execution_service = Service(name="provider_execution", path=f"{provider_process_service.path}/execution")
provider_jobs_service = Service(name="provider_jobs", path=provider_service.path + process_jobs_service.path)
provider_job_service = Service(name="provider_job", path=provider_service.path + process_job_service.path)
provider_results_service = Service(name="provider_results", path=provider_service.path + process_results_service.path)
provider_inputs_service = Service(name="provider_inputs", path=provider_service.path + process_inputs_service.path)
provider_outputs_service = Service(name="provider_outputs", path=provider_service.path + process_outputs_service.path)
+provider_exceptions_service = Service(
+ name="provider_exceptions",
+ path=provider_service.path + process_exceptions_service.path,
+)
provider_logs_service = Service(name="provider_logs", path=provider_service.path + process_logs_service.path)
provider_stats_service = Service(name="provider_stats", path=provider_service.path + process_stats_service.path)
-provider_exceptions_service = Service(name="provider_exceptions",
- path=provider_service.path + process_exceptions_service.path)
-provider_execution_service = Service(name="provider_execution", path=f"{provider_process_service.path}/execution")
+provider_prov_service = Service(name="provider_prov", path=provider_service.path + process_prov_service.path)
+provider_prov_info_service = Service(
+ name="provider_prov_info",
+ path=provider_service.path + process_prov_info_service.path,
+)
+provider_prov_who_service = Service(
+ name="provider_prov_who",
+ path=provider_service.path + process_prov_who_service.path,
+)
+provider_prov_inputs_service = Service(
+ name="provider_prov_inputs",
+ path=provider_service.path + process_prov_inputs_service.path,
+)
+provider_prov_inputs_run_service = Service(
+ name="provider_prov_inputs_run",
+ path=provider_service.path + process_prov_inputs_run_service.path,
+)
+provider_prov_outputs_service = Service(
+ name="provider_prov_outputs",
+ path=provider_service.path + process_prov_outputs_service.path,
+)
+provider_prov_outputs_run_service = Service(
+ name="provider_prov_outputs_run",
+ path=provider_service.path + process_prov_outputs_run_service.path,
+)
+provider_prov_run_service = Service(
+ name="provider_prov_run",
+ path=provider_service.path + process_prov_run_service.path,
+)
+provider_prov_run_id_service = Service(
+ name="provider_prov_run_id",
+ path=provider_service.path + process_prov_run_id_service.path,
+)
+provider_prov_runs_service = Service(
+ name="provider_prov_runs",
+ path=provider_service.path + process_prov_runs_service.path,
+)
# backward compatibility deprecated routes
job_result_service = Service(name="job_result", path=f"{job_service.path}/result")
@@ -715,6 +797,12 @@ class ResponseContentTypeHeader(ContentTypeHeader):
])
+class ResponseContentTypePlainTextHeader(ContentTypeHeader):
+ example = ContentType.TEXT_PLAIN
+ default = ContentType.TEXT_PLAIN
+ validator = OneOf([ContentType.TEXT_PLAIN])
+
+
class PreferHeader(ExtendedSchemaNode):
summary = "Header that describes job execution parameters."
description = (
@@ -743,6 +831,13 @@ class ResponseHeaders(ExtendedMappingSchema):
content_type = ResponseContentTypeHeader()
+class ResponsePlainTextHeaders(ResponseHeaders):
+ """
+ Headers describing resulting response.
+ """
+ content_type = ResponseContentTypePlainTextHeader()
+
+
class RedirectHeaders(ResponseHeaders):
Location = URL(example="https://job/123/result", description="Redirect resource location.")
@@ -5921,7 +6016,7 @@ class CWLTool(PermissiveMappingSchema):
class CWLWorkflowStepRunDefinition(AnyOfKeywordSchema):
_any_of = [
AnyIdentifier(
- title="CWLWorkflowStepRunID",
+ title="CWLWorkflowSteprun_id",
description="Reference to local process ID, with or without '.cwl' extension."
),
CWLFileName(),
@@ -6398,8 +6493,23 @@ class JobStatisticsSchema(ExtendedMappingSchema):
class FrontpageParameterSchema(ExtendedMappingSchema):
name = ExtendedSchemaNode(String(), example="api")
enabled = ExtendedSchemaNode(Boolean(), example=True)
- url = URL(description="Referenced parameter endpoint.", example="https://weaver-host", missing=drop)
- doc = ExtendedSchemaNode(String(), example="https://weaver-host/api", missing=drop)
+ url = URL(
+ description="Referenced parameter endpoint. Root URL when the functionality implies multiple endpoints.",
+ example="https://weaver-host",
+ missing=drop,
+ )
+ doc = ExtendedSchemaNode(
+ String(),
+ description="Endpoint where additional documentation can be found about the functionality.",
+ example="https://weaver-host/api",
+ missing=drop
+ )
+ api = URL(
+ String(),
+ description="OpenAPI documentation endpoint about the functionality.",
+ example="https://weaver-host/api",
+ missing=drop,
+ )
class FrontpageParameters(ExtendedSequenceSchema):
@@ -7677,6 +7787,82 @@ class GoneJobResponseSchema(ExtendedMappingSchema):
body = ErrorJsonResponseBodySchema()
+class JobProvAcceptHeader(AcceptHeader):
+ validator = OneOf(ProvenanceFormat.media_types())
+
+
+class JobProvRequestHeaders(RequestHeaders):
+ accept = JobProvAcceptHeader()
+
+
+class JobProvEndpoint(JobPath):
+ header = JobProvRequestHeaders()
+ querystring = FormatQuery()
+
+
+class ProcessJobProvEndpoint(JobProvEndpoint, LocalProcessPath):
+ pass
+
+
+class ProviderJobProvEndpoint(JobProvEndpoint, ProviderProcessPath):
+ pass
+
+
+class OkGetJobProvResponseHeaders(ResponseHeaders):
+ content_type = JobProvAcceptHeader()
+
+
+class OkGetJobProvResponse(ExtendedMappingSchema):
+ description = "Job provenance details in the requested format."
+ header = OkGetJobProvResponseHeaders()
+
+
+class JobProvMetadataRequestHeaders(ExtendedMappingSchema):
+ accept = ResponseContentTypePlainTextHeader()
+
+
+class JobProvMetadataEndpoint(JobPath):
+ header = JobProvMetadataRequestHeaders()
+
+
+class ProcessJobProvMetadataEndpoint(JobProvMetadataEndpoint, LocalProcessPath):
+ pass
+
+
+class ProviderJobProvMetadataEndpoint(JobProvMetadataEndpoint, ProviderProcessPath):
+ pass
+
+
+class JobProvMetadataResponseBody(ExtendedSchemaNode):
+ schema_type = String
+ description = "Multipart file contents for upload to the vault."
+
+
+class OkGetJobProvMetadataResponse(ExtendedMappingSchema):
+ description = "Job execution provenance metadata relative to the contextual request path."
+ header = ResponsePlainTextHeaders()
+ body = JobProvMetadataResponseBody()
+
+
+class NotFoundJobProvResponseSchema(NotFoundResponseSchema):
+ description = (
+ "Job reference UUID cannot be found, or a specified provenance "
+ "run UUID cannot be retrieved from the Workflow execution steps."
+ )
+ header = ResponseHeaders()
+ body = ErrorJsonResponseBodySchema()
+
+
+class GoneJobProvResponseSchema(ExtendedMappingSchema):
+ description = (
+ "Job reference contents have been removed or does not contain PROV metadata. "
+ "This could be because the job was created before provenance support was enabled, "
+ "or because a job retention period deleted the contents."
+ )
+ header = ResponseHeaders()
+ body = ErrorJsonResponseBodySchema()
+
+
class OkGetJobInputsResponse(ExtendedMappingSchema):
header = ResponseHeaders()
body = JobInputsBody()
@@ -8196,8 +8382,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema):
"406": NotAcceptableErrorResponseSchema(),
"422": UnprocessableEntityResponseSchema(),
}
-get_prov_all_jobs_responses = copy(get_all_jobs_responses)
-get_prov_all_jobs_responses.update({
+get_provider_all_jobs_responses = copy(get_all_jobs_responses)
+get_provider_all_jobs_responses.update({
"403": ForbiddenProviderLocalResponseSchema(),
})
get_single_job_status_responses = {
@@ -8217,8 +8403,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema):
"406": NotAcceptableErrorResponseSchema(),
"500": InternalServerErrorResponseSchema(),
}
-get_prov_single_job_status_responses = copy(get_single_job_status_responses)
-get_prov_single_job_status_responses.update({
+get_provider_single_job_status_responses = copy(get_single_job_status_responses)
+get_provider_single_job_status_responses.update({
"403": ForbiddenProviderLocalResponseSchema(),
})
patch_job_responses = {
@@ -8246,8 +8432,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema):
"410": GoneJobResponseSchema(),
"500": InternalServerErrorResponseSchema(),
}
-delete_prov_job_responses = copy(delete_job_responses)
-delete_prov_job_responses.update({
+delete_provider_job_responses = copy(delete_job_responses)
+delete_provider_job_responses.update({
"403": ForbiddenProviderLocalResponseSchema(),
})
get_job_inputs_responses = {
@@ -8263,8 +8449,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema):
"406": NotAcceptableErrorResponseSchema(),
"500": InternalServerErrorResponseSchema(),
}
-get_prov_inputs_responses = copy(get_job_inputs_responses)
-get_prov_inputs_responses.update({
+get_provider_inputs_responses = copy(get_job_inputs_responses)
+get_provider_inputs_responses.update({
"403": ForbiddenProviderLocalResponseSchema(),
})
get_job_outputs_responses = {
@@ -8281,8 +8467,8 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema):
"410": GoneJobResponseSchema(),
"500": InternalServerErrorResponseSchema(),
}
-get_prov_outputs_responses = copy(get_job_outputs_responses)
-get_prov_outputs_responses.update({
+get_provider_outputs_responses = copy(get_job_outputs_responses)
+get_provider_outputs_responses.update({
"403": ForbiddenProviderLocalResponseSchema(),
})
get_result_redirect_responses = {
@@ -8303,11 +8489,11 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema):
"410": GoneJobResponseSchema(),
"500": InternalServerErrorResponseSchema(),
}
-get_prov_results_responses = copy(get_job_results_responses)
-get_prov_results_responses.update({
+get_provider_results_responses = copy(get_job_results_responses)
+get_provider_results_responses.update({
"403": ForbiddenProviderLocalResponseSchema(),
})
-get_exceptions_responses = {
+get_job_exceptions_responses = {
"200": OkGetJobExceptionsResponse(description="success", examples={
"JobExceptions": {
"summary": "Job exceptions that occurred during failing process execution.",
@@ -8321,11 +8507,11 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema):
"410": GoneJobResponseSchema(),
"500": InternalServerErrorResponseSchema(),
}
-get_prov_exceptions_responses = copy(get_exceptions_responses)
-get_prov_exceptions_responses.update({
+get_provider_exceptions_responses = copy(get_job_exceptions_responses)
+get_provider_exceptions_responses.update({
"403": ForbiddenProviderLocalResponseSchema(),
})
-get_logs_responses = {
+get_job_logs_responses = {
"200": OkGetJobLogsResponse(description="success", examples={
"JobLogs": {
"summary": "Job logs registered and captured throughout process execution.",
@@ -8339,11 +8525,11 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema):
"410": GoneJobResponseSchema(),
"500": InternalServerErrorResponseSchema(),
}
-get_prov_logs_responses = copy(get_logs_responses)
-get_prov_logs_responses.update({
+get_provider_logs_responses = copy(get_job_logs_responses)
+get_provider_logs_responses.update({
"403": ForbiddenProviderLocalResponseSchema(),
})
-get_stats_responses = {
+get_job_stats_responses = {
"200": OkGetJobStatsResponse(description="success", examples={
"JobStatistics": {
"summary": "Job statistics collected following process execution.",
@@ -8357,10 +8543,58 @@ class GoneVaultFileDownloadResponse(ExtendedMappingSchema):
"410": GoneJobResponseSchema(),
"500": InternalServerErrorResponseSchema(),
}
-get_prov_stats_responses = copy(get_stats_responses)
-get_prov_stats_responses.update({
+get_provider_stats_responses = copy(get_job_stats_responses)
+get_provider_stats_responses.update({
"403": ForbiddenProviderLocalResponseSchema(),
})
+get_job_prov_responses = {
+ "200": OkGetJobProvResponse(
+ description="Successful job PROV details.",
+ examples={
+ "PROV-JSON": {
+ "summary": "Provenance details returned in PROV-JSON format.",
+ "value": EXAMPLES["job_prov.json"],
+ },
+ "PROV-N": {
+ "summary": "Provenance details returned in PROV-N format.",
+ "value": EXAMPLES["job_prov.txt"],
+ },
+ "PROV-XML": {
+ "summary": "Provenance details returned in PROV-XML format.",
+ "value": EXAMPLES["job_prov.xml"],
+ }
+ }
+ ),
+ "400": InvalidJobResponseSchema(),
+ "404": NotFoundJobProvResponseSchema(),
+ "406": NotAcceptableErrorResponseSchema(),
+ "410": GoneJobProvResponseSchema(),
+ "500": InternalServerErrorResponseSchema(),
+}
+get_job_prov_metadata_responses = {
+ "200": OkGetJobProvMetadataResponse(
+ description="Successful job PROV metadata retrieval.",
+ examples={
+ "PROV run": {
+ "summary": "Provenance metadata of the run execution.",
+ "value": EXAMPLES["job_prov_run.txt"],
+ },
+ "PROV who": {
+ "summary": "Provenance metadata of who ran the job.",
+ "value": EXAMPLES["job_prov_who.txt"],
+ },
+ "PROV info": {
+ "summary": "Provenance metadata about the Research Object packaging information.",
+ "value": EXAMPLES["job_prov_info.txt"],
+ }
+ }
+ ),
+ "400": InvalidJobResponseSchema(),
+ "404": NotFoundJobProvResponseSchema(),
+ "406": NotAcceptableErrorResponseSchema(),
+ "410": GoneJobProvResponseSchema(),
+ "500": InternalServerErrorResponseSchema(),
+}
get_quote_list_responses = {
"200": OkGetQuoteListResponse(description="success"),
"405": MethodNotAllowedErrorResponseSchema(),
diff --git a/weaver/wps_restapi/templates/responses/frontpage.mako b/weaver/wps_restapi/templates/responses/frontpage.mako
index 572d17a5b..858325de9 100644
--- a/weaver/wps_restapi/templates/responses/frontpage.mako
+++ b/weaver/wps_restapi/templates/responses/frontpage.mako
@@ -80,6 +80,22 @@
Enabled:
${util.render_bool(param.enabled)}
+ %if "api" in param:
+
+ %endif
+ %if "doc" in param:
+
+ %endif
%if "url" in param: