crim-ca · fmigneault · Dec 21, 2024 · Dec 21, 2024 · Dec 21, 2024 · Dec 21, 2024
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -16,7 +16,9 @@ Changes:
 
 Fixes:
 ------
-- No change.
+- Fix ``PROV`` endpoints returning multiple ``Content-Type`` headers
+  (default ``text/html`` inserted by ``webob.response.Response`` class onto top of the explicit one specified)
+  leading to inconsistent responses parsing and rendering across clients.
 
 .. _changes_6.1.0:
 

diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py
@@ -43,7 +43,7 @@
 from weaver.config import WeaverConfiguration
 from weaver.datatype import DockerAuthentication, Service
 from weaver.execute import ExecuteReturnPreference
-from weaver.formats import ContentType, OutputFormat, get_cwl_file_format, repr_json
+from weaver.formats import ContentType, OutputFormat, clean_media_type_format, get_cwl_file_format, repr_json
 from weaver.notify import decrypt_email
 from weaver.processes.constants import CWL_REQUIREMENT_APP_DOCKER, ProcessSchema
 from weaver.processes.types import ProcessType
@@ -2581,7 +2581,8 @@ def setUp(self):
     def test_prov(self):
         result = mocked_sub_requests(self.app, self.client.prov, self.job_url)
         assert result.success
-        assert result.headers["Content-Type"] == ContentType.APP_JSON
+        ctype = clean_media_type_format(result.headers["Content-Type"], strip_parameters=True)
+        assert ctype == ContentType.APP_JSON
         assert isinstance(result.body, dict), "body should be the PROV-JSON"
         assert "actedOnBehalfOf" in result.body
         assert "agent" in result.body
@@ -2591,7 +2592,8 @@ def test_prov(self):
     def test_prov_yaml_by_output_format(self):
         result = mocked_sub_requests(self.app, self.client.prov, self.job_url, output_format=OutputFormat.YAML)
         assert result.success
-        assert result.headers["Content-Type"] == ContentType.APP_JSON, "original type should still be JSON (from API)"
+        ctype = clean_media_type_format(result.headers["Content-Type"], strip_parameters=True)
+        assert ctype == ContentType.APP_JSON, "original type should still be JSON (from API)"
         assert isinstance(result.body, dict), "response body should still be the original PROV-JSON"
         assert isinstance(result.text, str), "text property should be the PROV-JSON represented as YAML string"
         assert yaml.safe_load(result.text) == result.body, "PROV-JSON contents should be identical in YAML format"
@@ -2603,7 +2605,8 @@ def test_prov_yaml_by_output_format(self):
     def test_prov_xml_by_prov_format(self):
         result = mocked_sub_requests(self.app, self.client.prov, self.job_url, prov_format=ProvenanceFormat.PROV_XML)
         assert result.success
-        assert result.headers["Content-Type"] == ContentType.APP_XML, "original type should still be XML (from API)"
+        ctype = clean_media_type_format(result.headers["Content-Type"], strip_parameters=True)
+        assert ctype == ContentType.APP_XML, "original type should still be XML (from API)"
         assert isinstance(result.body, str), "body should be the PROV-XML representation"
         assert "actedOnBehalfOf" in result.body
         assert "agent" in result.body
@@ -2613,14 +2616,16 @@ def test_prov_xml_by_prov_format(self):
     def test_prov_info(self):
         result = mocked_sub_requests(self.app, self.client.prov, self.job_url, prov=ProvenancePathType.PROV_INFO)
         assert result.success
-        assert result.headers["Content-Type"] == ContentType.TEXT_PLAIN
+        ctype = clean_media_type_format(result.headers["Content-Type"], strip_parameters=True)
+        assert ctype == ContentType.TEXT_PLAIN
         assert "Research Object of CWL workflow run" in result.text
         assert self.job_id in result.text
 
     def test_prov_run(self):
         result = mocked_sub_requests(self.app, self.client.prov, self.job_url, prov=ProvenancePathType.PROV_RUN)
         assert result.success
-        assert result.headers["Content-Type"] == ContentType.TEXT_PLAIN
+        ctype = clean_media_type_format(result.headers["Content-Type"], strip_parameters=True)
+        assert ctype == ContentType.TEXT_PLAIN
         assert self.proc_id in result.text
         assert self.job_id in result.text
         assert "< wf:main/message" in result.text, (
@@ -2639,7 +2644,8 @@ def test_prov_run_with_id(self):
             prov_run_id=self.job_id,  # redundant in this case, but test that parameter is parsed and resolves
         )
         assert result.success
-        assert result.headers["Content-Type"] == ContentType.TEXT_PLAIN
+        ctype = clean_media_type_format(result.headers["Content-Type"], strip_parameters=True)
+        assert ctype == ContentType.TEXT_PLAIN
         assert self.proc_id in result.text
         assert self.job_id in result.text
         assert "< wf:main/message" in result.text, (

diff --git a/tests/functional/test_job_provenance.py b/tests/functional/test_job_provenance.py
@@ -99,6 +99,7 @@ def test_job_prov_json(self, queries, headers):
         prov_url = f"{self.job_url}/prov"
         resp = self.app.get(prov_url, params=queries, headers=headers)
         assert resp.status_code == 200
+        assert len(list(filter(lambda header: header[0] == "Content-Type", resp.headerlist))) == 1
         assert resp.content_type == ContentType.APP_JSON
         prov = resp.json
         assert "prefix" in prov
@@ -113,6 +114,7 @@ def test_job_prov_xml(self, queries, headers):
         prov_url = f"{self.job_url}/prov"
         resp = self.app.get(prov_url, params=queries, headers=headers)
         assert resp.status_code == 200
+        assert len(list(filter(lambda header: header[0] == "Content-Type", resp.headerlist))) == 1
         assert resp.content_type in ContentType.ANY_XML
         prov = resp.text
         assert "<prov:document xmlns:wfprov" in prov
@@ -121,6 +123,7 @@ def test_job_prov_ttl(self):
         prov_url = f"{self.job_url}/prov"
         resp = self.app.get(prov_url, headers={"Accept": ContentType.TEXT_TURTLE})
         assert resp.status_code == 200
+        assert len(list(filter(lambda header: header[0] == "Content-Type", resp.headerlist))) == 1
         assert resp.content_type == ContentType.TEXT_TURTLE
         prov = resp.text
         assert "@prefix cwlprov: " in prov
@@ -129,6 +132,7 @@ def test_job_prov_nt(self):
         prov_url = f"{self.job_url}/prov"
         resp = self.app.get(prov_url, headers={"Accept": ContentType.APP_NT})
         assert resp.status_code == 200
+        assert len(list(filter(lambda header: header[0] == "Content-Type", resp.headerlist))) == 1
         assert resp.content_type == ContentType.APP_NT
         prov = resp.text
         assert "_:N" in prov
@@ -138,6 +142,7 @@ def test_job_prov_provn(self):
         prov_url = f"{self.job_url}/prov"
         resp = self.app.get(prov_url, headers={"Accept": ContentType.TEXT_PROVN})
         assert resp.status_code == 200
+        assert len(list(filter(lambda header: header[0] == "Content-Type", resp.headerlist))) == 1
         assert resp.content_type == ContentType.TEXT_PROVN
         prov = resp.text
         assert "prov:type='wfprov:WorkflowEngine'" in prov
@@ -147,6 +152,7 @@ def test_job_prov_info_text(self):
         job_id = self.job_url.rsplit("/", 1)[-1]
         resp = self.app.get(prov_url, headers={"Accept": ContentType.TEXT_PLAIN})
         assert resp.status_code == 200
+        assert len(list(filter(lambda header: header[0] == "Content-Type", resp.headerlist))) == 1
         assert resp.content_type == ContentType.TEXT_PLAIN
         prov = resp.text
         assert f"Workflow run ID: urn:uuid:{job_id}" in prov
@@ -161,6 +167,7 @@ def test_job_prov_info_not_acceptable(self):
         headers = self.json_headers  # note: this is the test, while only plain text is supported
         resp = self.app.get(f"{prov_url}/info", headers=headers, expect_errors=True)
         assert resp.status_code == 406
+        assert len(list(filter(lambda header: header[0] == "Content-Type", resp.headerlist))) == 1
         assert resp.content_type == ContentType.APP_JSON, (
             "error should be in JSON regardless of Accept header or the normal contents media-type"
         )
@@ -176,6 +183,8 @@ def test_job_prov_commands(self, path, cmd):
         proc_url = f"/{path}/{self.proc_id}" if path == "processes" else ""
         prov_url = f"{proc_url}/jobs/{job_id}/prov/{cmd}"
         resp = self.app.get(prov_url, headers={"Accept": ContentType.TEXT_PLAIN})
+        assert resp.status_code == 200
+        assert len(list(filter(lambda header: header[0] == "Content-Type", resp.headerlist))) == 1
         assert resp.content_type == ContentType.TEXT_PLAIN
         assert resp.text != ""
 
@@ -194,6 +203,8 @@ def test_job_prov_run_id(self, path):
         job_id = self.job_url.rsplit("/", 1)[-1]
         prov_url = f"{self.job_url}/prov/{path}/{job_id}"
         resp = self.app.get(prov_url, headers={"Accept": ContentType.TEXT_PLAIN})
+        assert resp.status_code == 200
+        assert len(list(filter(lambda header: header[0] == "Content-Type", resp.headerlist))) == 1
         assert resp.content_type == ContentType.TEXT_PLAIN
         assert resp.text != ""
 

diff --git a/weaver/datatype.py b/weaver/datatype.py
@@ -1507,7 +1507,7 @@ def prov_path(self, container=None, extra_path=None, prov_format=None):
     def prov_data(
         self,
         container=None,     # type: Optional[AnySettingsContainer]
-        extra_path=None,    # type: Optional[ProvenancePathType]
+        extra_path=None,    # type: Optional[Union[ProvenancePathType, str]]
         prov_format=None,   # type: AnyContentType
     ):                      # type: (...) -> Tuple[Optional[str], Optional[AnyContentType]]
         """

diff --git a/weaver/wps_restapi/jobs/utils.py b/weaver/wps_restapi/jobs/utils.py
@@ -1451,8 +1451,7 @@ def get_job_prov_response(request):
             prov_body["error"] = "No such run ID for specified job provenance."
             prov_body["value"] = {"run_id": str(request.matchdict["run_id"])}
         prov_body["status"] = prov_err.code
-        return prov_err(json=prov_body, headers={"Content-Type": ContentType.APP_JSON})
+        return prov_err(json=prov_body, content_type=ContentType.APP_JSON)
     links = job.links(container=request, self_link="provenance")
     headers = [("Link", make_link_header(link)) for link in links]
-    headers.append(("Content-Type", prov_type))
-    return HTTPOk(body=prov_data, headers=headers)
+    return HTTPOk(body=prov_data, headers=headers, content_type=prov_type, charset="utf-8")