From e65418472e0830ff4d8a53bef43733e3419faf05 Mon Sep 17 00:00:00 2001
From: Francis Charette Migneault <francis.charette.migneault@gmail.com>
Date: Wed, 20 Sep 2023 23:39:33 -0400
Subject: [PATCH] [wip] fix/patch some test cases

---
 tests/functional/test_cli.py         |  4 +-
 tests/functional/test_wps_package.py |  8 +--
 tests/processes/test_convert.py      | 72 ---------------------------
 tests/processes/test_wps_package.py  | 74 ++++++++++++++++++++++++++++
 weaver/processes/convert.py          | 21 +++++---
 weaver/processes/wps_package.py      |  5 +-
 weaver/processes/wps_workflow.py     |  1 -
 7 files changed, 98 insertions(+), 87 deletions(-)

diff --git a/tests/functional/test_cli.py b/tests/functional/test_cli.py
index 2a3d0801d..337420fbb 100644
--- a/tests/functional/test_cli.py
+++ b/tests/functional/test_cli.py
@@ -492,7 +492,7 @@ def test_execute_inputs_invalid(self):
         ]:
             self.run_execute_inputs_schema_variant(invalid_inputs_schema, expect_success=False)
 
-    @pytest.mark.flaky(reruns=2, reruns_delay=1)
+    @pytest.mark.flaky(reruns=2, reruns_delay=3)
     def test_execute_manual_monitor_status_and_download_results(self):
         """
         Test a typical case of :term:`Job` execution, result retrieval and download, but with manual monitoring.
@@ -873,7 +873,7 @@ def add_docker_pull_ref(cwl, ref):
         cwl["requirements"][CWL_REQUIREMENT_APP_DOCKER] = {"dockerPull": ref}
         return cwl
 
-    @pytest.mark.flaky(reruns=2, reruns_delay=1)
+    @pytest.mark.flaky(reruns=2, reruns_delay=3)
     def test_deploy_docker_auth_username_password_valid(self):
         """
         Test that username and password arguments can be provided simultaneously for docker login.
diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py
index 4677779f3..1730e5c02 100644
--- a/tests/functional/test_wps_package.py
+++ b/tests/functional/test_wps_package.py
@@ -2318,7 +2318,7 @@ def test_execute_with_browsable_directory(self):
             assert all(file.startswith(cwl_stage_dir) for file in output_listing)
             assert all(any(file.endswith(dir_file) for file in output_listing) for dir_file in expect_http_files)
 
-    @pytest.mark.flaky(reruns=2, reruns_delay=1)
+    @pytest.mark.flaky(reruns=2, reruns_delay=3)
     def test_execute_with_json_listing_directory(self):
         """
         Test that HTTP returning JSON list of directory contents retrieves children files for the process.
@@ -3151,6 +3151,7 @@ def test_execute_cwl_enum_schema_combined_type_single_array_from_cwl(self):
         body = self.retrieve_payload(proc, "deploy", local=True)
         pkg = self.retrieve_payload(proc, "package", local=True)
         body["executionUnit"] = [{"unit": pkg}]
+        body["processDescription"]["process"]["id"] = self._testMethodName
         self.deploy_process(body, describe_schema=ProcessSchema.OGC)
 
         data = self.retrieve_payload(proc, "execute", local=True)
@@ -3162,7 +3163,7 @@ def test_execute_cwl_enum_schema_combined_type_single_array_from_cwl(self):
         with contextlib.ExitStack() as stack:
             for mock_exec in mocked_execute_celery():
                 stack.enter_context(mock_exec)
-            proc_url = f"/processes/{proc}/jobs"
+            proc_url = f"/processes/{self._testMethodName}/jobs"
             resp = mocked_sub_requests(self.app, "post_json", proc_url, timeout=5,
                                        data=exec_body, headers=self.json_headers, only_local=True)
             assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}"
@@ -3203,6 +3204,7 @@ def test_execute_cwl_enum_schema_combined_type_single_array_from_wps(self, mock_
             version="1.0.0"
         )
         body["executionUnit"] = [{"href": wps}]
+        body["processDescription"]["process"]["id"] = self._testMethodName
         self.deploy_process(body, describe_schema=ProcessSchema.OGC)
 
         data = self.retrieve_payload(proc, "execute", local=True)
@@ -3234,7 +3236,7 @@ def test_execute_cwl_enum_schema_combined_type_single_array_from_wps(self, mock_
             mock_responses.add("GET", output_log_url, body="log", headers={"Content-Type": ContentType.TEXT_PLAIN})
             mock_responses.add("GET", output_zip_url, body="zip", headers={"Content-Type": ContentType.APP_ZIP})
 
-            proc_url = f"/processes/{proc}/jobs"
+            proc_url = f"/processes/{self._testMethodName}/jobs"
             resp = mocked_sub_requests(self.app, "post_json", proc_url, timeout=5,
                                        data=exec_body, headers=self.json_headers, only_local=True)
             assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}"
diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py
index 1f7b335e2..ae43288b5 100644
--- a/tests/processes/test_convert.py
+++ b/tests/processes/test_convert.py
@@ -649,78 +649,6 @@ def test_any2cwl_io_enum_validate(test_io, test_input, expect_valid):
             tool(**inputs)
 
 
-def test_any2cwl_io_enum_schema_name():
-    """
-    Ensure that :term:`CWL` ``Enum`` contains a ``name`` to avoid false-positive conflicting schemas.
-
-    When an ``Enum`` is reused multiple times to define an I/O, omitting the ``name`` makes the duplicate definition
-    to be considered a conflict, since :mod:`cwltool` will automatically apply an auto-generated ``name`` for that
-    schema.
-
-    .. seealso::
-        - https://github.com/common-workflow-language/cwltool/issues/1908
-    """
-    test_symbols = [str(i) for i in range(100)]
-    test_input = {
-        "id": "test",
-        "data_type": "string",
-        "allowed_values": test_symbols,
-        "any_value": False,
-        "min_occurs": 0,
-        "max_occurs": 3,
-    }
-    cwl_expect = {
-        "id": "test",
-        "type": [
-            "null",
-            {
-                "type": "enum",
-                "symbols": test_symbols,
-            },
-            {
-                "type": "array",
-                "items": {
-                    "type": "enum",
-                    "symbols": test_symbols,
-                },
-            },
-        ]
-    }
-    cwl_input, _ = any2cwl_io(test_input, IO_INPUT)
-    assert cwl_input == cwl_expect
-
-    # test it against an actual definition
-    cwl = {
-        "cwlVersion": "v1.2",
-        "class": "CommandLineTool",
-        "baseCommand": "echo",
-        "inputs": {
-            "test": cwl_input,
-        },
-        "outputs": {
-            "output": "stdout",
-        }
-    }
-    cwl_without_name = deepcopy(cwl)
-    cwl_without_name["inputs"]["test"].pop("name", None)  # FIXME: no None since 'name' required
-
-    factory = CWLFactory()
-    with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as tmp_file:
-        json.dump(cwl_without_name, tmp_file)
-        tmp_file.flush()
-        with pytest.raises(WorkflowException):
-            tool = factory.make(f"file://{tmp_file.name}")
-            tool(test=test_symbols[0])
-
-        tmp_file.seek(0)
-        json.dump(cwl, tmp_file)
-        tmp_file.flush()
-        tool = factory.make(f"file://{tmp_file.name}")
-        tool(test=None)
-        tool(test=test_symbols[0])
-        tool(test=[test_symbols[0]])
-
-
 @pytest.mark.parametrize(
     ["test_io", "expect"],
     [
diff --git a/tests/processes/test_wps_package.py b/tests/processes/test_wps_package.py
index a8ad1a9aa..d2e3d737c 100644
--- a/tests/processes/test_wps_package.py
+++ b/tests/processes/test_wps_package.py
@@ -4,9 +4,12 @@
 .. seealso::
     - :mod:`tests.functional.wps_package`.
 """
+import warnings
+
 import contextlib
 import copy
 import io
+import json
 import logging
 import os
 import re
@@ -18,6 +21,9 @@
 import cwltool.process
 import mock
 import pytest
+from _pytest.outcomes import Failed
+from cwltool.errors import WorkflowException
+from cwltool.factory import Factory as CWLFactory
 
 from tests.utils import assert_equal_any_order
 from weaver.datatype import Process
@@ -486,3 +492,71 @@ def test_cwl_extension_requirements_no_error():
         f"Error message must contain all following items: {valid_msg}. "
         f"Some items were missing in: \n{message}"
     )
+
+
+def test_cwl_enum_schema_name_patched():
+    """
+    Ensure that :term:`CWL` ``Enum`` contains a ``name`` to avoid false-positive conflicting schemas.
+
+    When an ``Enum`` is reused multiple times to define an I/O, omitting the ``name`` makes the duplicate definition
+    to be considered a conflict, since :mod:`cwltool` will automatically apply an auto-generated ``name`` for that
+    schema.
+
+    .. seealso::
+        - https://github.com/common-workflow-language/cwltool/issues/1908
+        - :meth:`weaver.processes.wps_package.WpsPackage.update_cwl_schema_names`
+    """
+    test_symbols = [str(i) for i in range(100)]
+    cwl_input_without_name = {
+        "type": [
+            "null",
+            {
+                "type": "enum",
+                "symbols": test_symbols,
+            },
+            {
+                "type": "array",
+                "items": {
+                    "type": "enum",
+                    "symbols": test_symbols,
+                },
+            },
+        ]
+    }
+    cwl_without_name = {
+        "cwlVersion": "v1.2",
+        "class": "CommandLineTool",
+        "baseCommand": "echo",
+        "inputs": {
+            "test": cwl_input_without_name,
+        },
+        "outputs": {
+            "output": {"type": "stdout"},
+        }
+    }  # type: CWL
+
+    factory = CWLFactory()
+    with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as tmp_file:
+        json.dump(cwl_without_name, tmp_file)
+        tmp_file.flush()
+        try:
+            with pytest.raises(WorkflowException):
+                tool = factory.make(f"file://{tmp_file.name}")
+                tool(test=test_symbols[0])
+        except Failed:
+            # WARNING:
+            #   CWL tool schema-salad validator seems to inconsistently raise in some situations and not others (?)
+            #   (see https://github.com/common-workflow-language/cwltool/issues/1908)
+            #   Ignore if it raises since it is not breaking for our test and implementation.
+            warnings.warn("CWL nested enums without 'name' did not raise, but not breaking...")
+
+    # our implementation that eventually gets called goes through 'update_cwl_schema_names', that one MUST NOT raise
+    pkg = WpsPackage(package=cwl_without_name, identifier="test", title="test")
+    pkg.update_cwl_schema_names()
+    with tempfile.NamedTemporaryFile(mode="w", encoding="utf-8") as tmp_file:
+        json.dump(pkg.package, tmp_file)
+        tmp_file.flush()
+        tool = factory.make(f"file://{tmp_file.name}")
+        tool(test=None)
+        tool(test=test_symbols[0])
+        tool(test=[test_symbols[0]])
diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py
index 7861a8f5d..6298fd71e 100644
--- a/weaver/processes/convert.py
+++ b/weaver/processes/convert.py
@@ -528,9 +528,11 @@ def _convert_any2cwl_io_complex(cwl_io, cwl_ns, wps_io, io_select):
     # outputs are allowed to define only one 'applied' format
     for field in WPS_FIELD_FORMAT:
         fmt = get_field(wps_io, field, search_variations=True)
+        if not fmt:
+            continue
         if isinstance(fmt, (list, tuple)) and len(fmt) == 1:
             fmt = fmt[0]
-        if isinstance(fmt, dict):
+        if not isinstance(fmt, (list, tuple)):  # could be 'dict', 'Format' or any other 'object' holder
             cwl_io_ref, cwl_io_fmt, cwl_io_ext = _get_cwl_fmt_details(fmt)
             if cwl_io_ref and cwl_io_fmt:
                 cwl_ns.update(cwl_io_ref)
@@ -1193,15 +1195,21 @@ def resolve_cwl_io_type_schema(io_info, cwl_schema_names=None):
         - :meth:`weaver.processes.wps_package.WpsPackage.update_cwl_schema_names`
     """
     if not isinstance(io_info, dict) or not cwl_schema_names:
-        return io_info
+        return get_cwl_io_type_name(io_info)
     io_type = io_info.get("type")
     io_item = io_info.get("items")
-    if io_type == "array" and isinstance(io_item, str) and io_item in cwl_schema_names:
+    if io_type == PACKAGE_ARRAY_BASE and isinstance(io_item, str):
         io_info = io_info.copy()  # avoid undoing CWL tool parsing/resolution
-        io_info["items"] = cwl_schema_names[io_item]._props
-    elif isinstance(io_type, str) and io_type in cwl_schema_names:
+        io_name = get_cwl_io_type_name(io_item)  # avoid mapping back to File/Directory records in CWL schema names
+        if io_name in cwl_schema_names:
+            io_name = cwl_schema_names[io_item]._props
+        io_info["items"] = io_name
+    elif isinstance(io_type, str):
         io_info = io_info.copy()  # avoid undoing CWL tool parsing/resolution
-        io_info["type"] = cwl_schema_names[io_type]._props
+        io_name = get_cwl_io_type_name(io_type)  # avoid mapping back to File/Directory records in CWL schema names
+        if io_name in cwl_schema_names:
+            io_name = cwl_schema_names[io_type]._props
+        io_info["type"] = io_name
     return io_info
 
 
@@ -1346,7 +1354,6 @@ def get_cwl_io_type(io_info, strict=True, cwl_schema_names=None):
             io_type_many = set()
             io_base_type = None
             for i, typ in enumerate(io_type, start=int(is_null)):
-                typ = get_cwl_io_type_name(typ)
                 typ = resolve_cwl_io_type_schema(typ, cwl_schema_names)
                 io_name = io_info["name"]
                 sub_type = {"type": typ, "name": f"{io_name}[{i}]"}  # type: CWL_IO_Type
diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py
index 6eb92567f..191fbe1ab 100644
--- a/weaver/processes/wps_package.py
+++ b/weaver/processes/wps_package.py
@@ -145,7 +145,6 @@
     from weaver.processes.constants import IO_Select_Type
     from weaver.processes.convert import (
         ANY_IO_Type,
-        CWL_Input_Type,
         JSON_IO_Type,
         PKG_IO_Type,
         WPS_Input_Type,
@@ -157,6 +156,7 @@
         AnyValueType,
         CWL,
         CWL_AnyRequirements,
+        CWL_Input_Type,
         CWL_IO_ComplexType,
         CWL_IO_Type,
         CWL_Requirement,
@@ -1545,6 +1545,7 @@ def update_requirements(self):
                 self.package["baseCommand"] = os.path.join(active_python_path, "python")
 
     def update_cwl_schema_names(self):
+        # type: () -> None
         """
         Detect duplicate :term:`CWL` schema types not referred by name to provide one and avoid resolution failure.
 
@@ -1883,7 +1884,7 @@ def must_fetch(self, input_ref, input_type):
     def make_inputs(self,
                     wps_inputs,         # type: Dict[str, Deque[WPS_Input_Type]]
                     cwl_inputs_info,    # type: Dict[str, CWL_Input_Type]
-                    cwl_schema_names,   # type: Dict[str, CWL_SchemaNames]
+                    cwl_schema_names,   # type: CWL_SchemaNames
                     ):                  # type: (...) -> Dict[str, ValueType]
         """
         Converts :term:`WPS` input values to corresponding :term:`CWL` input values for processing by the package.
diff --git a/weaver/processes/wps_workflow.py b/weaver/processes/wps_workflow.py
index cd11b2251..bd893bef9 100644
--- a/weaver/processes/wps_workflow.py
+++ b/weaver/processes/wps_workflow.py
@@ -1,6 +1,5 @@
 import collections.abc
 import logging
-import os
 import tempfile
 from functools import partial
 from typing import TYPE_CHECKING, cast  # these are actually used in the code