galaxyproject · nsoranzo · May 18, 2020 · Nov 18, 2019 · Nov 5, 2020 · Nov 6, 2018
diff --git a/.github/workflows/cwl_conformance.yaml b/.github/workflows/cwl_conformance.yaml
@@ -18,15 +18,17 @@ concurrency:
 jobs:
   test:
     name: Test
-    if: ${{ false }}
     runs-on: ubuntu-latest
     continue-on-error: ${{ startsWith(matrix.marker, 'red') }}
     strategy:
       fail-fast: false
       matrix:
         python-version: ['3.8']
         marker: ['green', 'red and required', 'red and not required']
-        conformance-version: ['cwl_conformance_v1_0'] #, 'cwl_conformance_v1_1', 'cwl_conformance_v1_2']
+        conformance-version: ['cwl_conformance_v1_0', 'cwl_conformance_v1_1', 'cwl_conformance_v1_2']
+        exclude:
+          - marker: red and required
+            conformance-version: cwl_conformance_v1_0
     services:
       postgres:
         image: postgres:13

diff --git a/client/src/api/datasets.ts b/client/src/api/datasets.ts
@@ -67,6 +67,7 @@ export async function copyDataset(
             // TODO: Investigate. These should be optional, but the API requires explicit null values?
             type,
             copy_elements: null,
+            fields: null,
             hide_source_items: null,
             instance_type: null,
         },

diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts
@@ -6955,6 +6955,12 @@ export interface components {
              * @description List of elements that should be in the new collection.
              */
             element_identifiers?: components["schemas"]["CollectionElementIdentifier"][] | null;
+            /**
+             * Fields
+             * @description List of fields to create for this collection. Set to 'auto' to guess fields from identifiers.
+             * @default []
+             */
+            fields: string | components["schemas"]["FieldDict"][] | null;
             /**
              * Folder Id
              * @description The ID of the library folder that will contain the collection. Required if `instance_type=library`.
@@ -7147,6 +7153,12 @@ export interface components {
              * @description List of elements that should be in the new collection.
              */
             element_identifiers?: components["schemas"]["CollectionElementIdentifier"][] | null;
+            /**
+             * Fields
+             * @description List of fields to create for this collection. Set to 'auto' to guess fields from identifiers.
+             * @default []
+             */
+            fields: string | components["schemas"]["FieldDict"][] | null;
             /**
              * Folder Id
              * @description The ID of the library folder that will contain the collection. Required if `instance_type=library`.
@@ -9097,6 +9109,13 @@ export interface components {
             /** Hash Value */
             hash_value: string;
         };
+        /** FieldDict */
+        FieldDict: {
+            /** Name */
+            name: string;
+            /** Type */
+            type: string;
+        };
         /** FileDataElement */
         FileDataElement: {
             /** Md5 */

diff --git a/client/src/components/History/model/queries.ts b/client/src/components/History/model/queries.ts
@@ -86,6 +86,7 @@ export async function createDatasetCollection(history: HistorySummary, inputs =
         copy_elements: true,
         name: "list",
         element_identifiers: [],
+        fields: "auto",
         hide_source_items: true,
     };
     const payload = Object.assign({}, defaults, inputs);

diff --git a/doc/source/dev/cwl.md b/doc/source/dev/cwl.md
@@ -0,0 +1,24 @@
+CWL import in Galaxy
+====================
+
+What is supported
+-----------------
+
+What is not supported
+---------------------
+
+Some CWL Expressions / Parameter references that do math on `$(resources.cores)`
+or similar will likely not work.
+
+How to enable it?
+-----------------
+
+1. List paths to CWL tools in `tool_conf.xml` .
+2. Set the following in  `galaxy.yml`: 
+
+   ```yaml
+   enable_beta_tool_formats: true
+   enable_beta_workflow_modules: true
+   check_upload_content: false
+   strict_cwl_validation: false
+   ```
diff --git a/lib/galaxy/config/__init__.py b/lib/galaxy/config/__init__.py
@@ -931,6 +931,9 @@ def _process_config(self, kwargs: Dict[str, Any]) -> None:
             else None
         )
 
+        # TODO: migrate to schema.
+        # Should CWL artifacts be loaded with strict validation enabled.
+        self.strict_cwl_validation = string_as_bool(kwargs.get("strict_cwl_validation", "True"))
         # These are not even beta - just experiments - don't use them unless
         # you want yours tools to be broken in the future.
         self.enable_beta_tool_formats = string_as_bool(kwargs.get("enable_beta_tool_formats", "False"))

diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample
@@ -301,7 +301,7 @@
     <datatype extension="tar" auto_compressed_types="gz,bz2" type="galaxy.datatypes.binary:CompressedArchive" subclass="true" display_in_upload="true">
       <converter file="archive_to_directory.xml" target_datatype="directory"/>
     </datatype>
-    <datatype extension="directory" type="galaxy.datatypes.data:Directory"/>
+    <datatype extension="directory" type="galaxy.datatypes.data:Directory" display_in_upload="true"/>
     <datatype extension="zarr" type="galaxy.datatypes.data:ZarrDirectory" />
     <datatype extension="ome_zarr" type="galaxy.datatypes.images:OMEZarr" />
     <datatype extension="yaml" type="galaxy.datatypes.text:Yaml" display_in_upload="true" />

diff --git a/lib/galaxy/datatypes/converters/tar_to_directory.xml b/lib/galaxy/datatypes/converters/tar_to_directory.xml
@@ -1,25 +1,30 @@
-<tool id="CONVERTER_tar_to_directory" name="Convert tar to directory" version="1.0.1" profile="17.05">
+<tool id="CONVERTER_tar_to_directory" name="Convert tar to directory" version="1.0.1" profile="21.09">
     <!-- Don't use tar directly so we can verify safety of results - tar -xzf '$input1'; -->
     <requirements>
         <requirement type="package" version="23.2.1">galaxy-util</requirement>
     </requirements>
-    <command>
-        mkdir '$output1.files_path';
-        cd '$output1.files_path';
-        python -c "from galaxy.util.compression_utils import CompressedFile; CompressedFile('$input1').extract('.');"
-    </command>
+    <command detect_errors="exit_code"><![CDATA[
+cp '$provided_metadata' 'galaxy.json' &&
+mkdir '$output1.files_path' &&
+cd '$output1.files_path' &&
+python -c "from galaxy.util.compression_utils import CompressedFile; CompressedFile('$input1').extract('.');"
+    ]]></command>
+    <configfiles>
+        <configfile name="provided_metadata">{"output1": {"created_from_basename": "${input1.created_from_basename}"}}
+</configfile>
+    </configfiles>
     <inputs>
         <param format="tar" name="input1" type="data"/>
     </inputs>
     <outputs>
-        <data format="directory" name="output1"/>
+        <data format="directory" name="output1" metadata_source="input1" />
     </outputs>
     <tests>
         <test>
             <param name="input1" ftype="tar" value="testdir1.tar"/>
             <output name="output1" ftype="directory" value="testdir1.tar.directory"/>
         </test>
     </tests>
-    <help>
-    </help>
+    <help><![CDATA[
+    ]]></help>
 </tool>
diff --git a/lib/galaxy/datatypes/registry.py b/lib/galaxy/datatypes/registry.py
@@ -72,6 +72,7 @@
         self.config = config
         self.edam = edam
         self.datatypes_by_extension: Dict[str, Data] = {}
+        self.datatypes_by_format = {}
         self.datatypes_by_suffix_inferences = {}
         self.mimetypes_by_extension = {}
         self.datatype_converters = {}
@@ -269,13 +270,25 @@
                             upload_warning_template = Template(upload_warning_el.text or "")
                         datatype_instance = datatype_class()
                         self.datatypes_by_extension[extension] = datatype_instance
+                        if not datatype_class.is_subclass:
+                            edam_format = datatype_class.edam_format
+                            prefixed_format = f"edam:{edam_format}"
+                            if prefixed_format not in self.datatypes_by_format:
+                                register_datatype_by_format = True
+                                for super_klass in datatype_class.__mro__[1:-1]:
+                                    super_edam_format = getattr(super_klass, "edam_format", None)
+                                    if super_edam_format == edam_format:
+                                        register_datatype_by_format = False
+                                        break
+                                if register_datatype_by_format:
+                                    self.datatypes_by_format[prefixed_format] = datatype_instance
                         if mimetype is None:
                             # Use default mimetype per datatype specification.
                             mimetype = self.datatypes_by_extension[extension].get_mime()
                         self.mimetypes_by_extension[extension] = mimetype
                         if datatype_class.track_type:
                             self.available_tracks.append(extension)
-                        if display_in_upload and extension not in self.upload_file_formats:
+                        if display_in_upload:
                             self.upload_file_formats.append(extension)
                         # Max file size cut off for setting optional metadata.
                         self.datatypes_by_extension[extension].max_optional_metadata_filesize = elem.get(
@@ -413,6 +426,7 @@
                 override=override,
                 compressed_sniffers=compressed_sniffers,
             )
+            self.upload_file_formats = list(set(self.upload_file_formats))
             self.upload_file_formats.sort()
             # Load build sites
             if use_build_sites:
@@ -613,6 +627,20 @@
         """Returns a datatype object based on an extension"""
         return self.datatypes_by_extension.get(ext, None)
 
+    def get_datatype_by_format_ontology(self, ontology: str):
+        """Returns a datatype by format ontology"""
+        if "edamontology.org/" in ontology:
+            ontology = f"edam:{ontology.split('edamontology.org/')[1]}"
+        return self.datatypes_by_format.get(ontology)
+
+    def get_datatype_ext_by_format_ontology(self, ontology: str, only_uploadable: bool = False) -> Optional[str]:
+        """Returns a datatype by format ontology"""
+        datatype = self.get_datatype_by_format_ontology(ontology)
+        if datatype:
+            if not only_uploadable or datatype.file_ext in self.upload_file_formats:
+                return datatype.file_ext
+        return None
+
     def change_datatype(self, data, ext):
         if data.extension != ext:
             data.extension = ext

diff --git a/lib/galaxy/jobs/__init__.py b/lib/galaxy/jobs/__init__.py
@@ -1144,7 +1144,7 @@ def can_split(self):
 
     @property
     def is_cwl_job(self):
-        return self.tool.tool_type == "cwl"
+        return self.tool.tool_type in ["galactic_cwl", "cwl"]
 
     def get_job_runner_url(self):
         log.warning(f"({self.job_id}) Job runner URLs are deprecated, use destinations instead.")
@@ -1776,8 +1776,9 @@ def _finish_dataset(
                 dataset.mark_unhidden()
         elif not purged:
             # If the tool was expected to set the extension, attempt to retrieve it
-            if dataset.ext == "auto":
-                dataset.extension = context.get("ext", "data")
+            context_ext = context.get("ext", "data")
+            if dataset.ext == "auto" or (dataset.ext == "data" and context_ext != "data"):
+                dataset.extension = context_ext
                 dataset.init_meta(copy_from=dataset)
             # if a dataset was copied, it won't appear in our dictionary:
             # either use the metadata from originating output dataset, or call set_meta on the copies

diff --git a/lib/galaxy/jobs/command_factory.py b/lib/galaxy/jobs/command_factory.py
@@ -100,19 +100,26 @@ def build_command(
             external_command_shell = container.shell
         else:
             external_command_shell = shell
-        externalized_commands = __externalize_commands(
-            job_wrapper, external_command_shell, commands_builder, remote_command_params, container=container
-        )
         if container and modify_command_for_container:
-            # Stop now and build command before handling metadata and copying
-            # working directory files back. These should always happen outside
-            # of docker container - no security implications when generating
-            # metadata and means no need for Galaxy to be available to container
-            # and not copying workdir outputs back means on can be more restrictive
-            # of where container can write to in some circumstances.
-            run_in_container_command = container.containerize_command(externalized_commands)
+            if job_wrapper.tool and not job_wrapper.tool.may_use_container_entry_point:
+                externalized_commands = __externalize_commands(
+                    job_wrapper, external_command_shell, commands_builder, remote_command_params, container=container
+                )
+                # Stop now and build command before handling metadata and copying
+                # working directory files back. These should always happen outside
+                # of docker container - no security implications when generating
+                # metadata and means no need for Galaxy to be available to container
+                # and not copying workdir outputs back means on can be more restrictive
+                # of where container can write to in some circumstances.
+                run_in_container_command = container.containerize_command(externalized_commands)
+            else:
+                tool_commands = commands_builder.build()
+                run_in_container_command = container.containerize_command(tool_commands)
             commands_builder = CommandsBuilder(run_in_container_command)
         else:
+            externalized_commands = __externalize_commands(
+                job_wrapper, external_command_shell, commands_builder, remote_command_params, container=container
+            )
             commands_builder = CommandsBuilder(externalized_commands)
 
     # Galaxy writes I/O files to outputs, Pulsar uses metadata. metadata seems like
@@ -130,7 +137,13 @@ def build_command(
 
         # Copy working and outputs before job submission so that these can be restored on resubmission
         # xref https://github.com/galaxyproject/galaxy/issues/3289
-        commands_builder.prepend_command(PREPARE_DIRS)
+        if not job_wrapper.is_cwl_job:
+            commands_builder.prepend_command(PREPARE_DIRS)
+        else:
+            # Can't do the rm -rf working for CWL jobs since we may have staged outputs
+            # into that directory. This does mean CWL is incompatible with job manager triggered
+            # retries - what can we do with that information?
+            commands_builder.prepend_command("mkdir -p outputs; cd working")
 
     __handle_remote_command_line_building(commands_builder, job_wrapper, for_pulsar=for_pulsar)
 

diff --git a/lib/galaxy/jobs/runners/local.py b/lib/galaxy/jobs/runners/local.py
@@ -4,6 +4,7 @@
 
 import datetime
 import logging
+import math
 import os
 import subprocess
 import tempfile
@@ -67,7 +68,16 @@ def _command_line(self, job_wrapper: "MinimalJobWrapper") -> Tuple[str, str]:
         if slots:
             slots_statement = f'GALAXY_SLOTS="{int(slots)}"; export GALAXY_SLOTS; GALAXY_SLOTS_CONFIGURED="1"; export GALAXY_SLOTS_CONFIGURED;'
         else:
-            slots_statement = 'GALAXY_SLOTS="1"; export GALAXY_SLOTS;'
+            cores_min = 1
+            if job_wrapper.tool:
+                try:
+                    # In CWL 1.2 it can be a float that can be rounded to the next whole number
+                    cores_min = math.ceil(float(job_wrapper.tool.cores_min))
+                except ValueError:
+                    # TODO: in CWL this can be an expression referencing runtime
+                    # parameters, e.g. `$(inputs.special_file.size)`
+                    pass
+            slots_statement = f'GALAXY_SLOTS="{cores_min}"; export GALAXY_SLOTS;'
 
         job_id = job_wrapper.get_id_tag()
         job_file = JobState.default_job_file(job_wrapper.working_directory, job_id)

diff --git a/lib/galaxy/managers/collections.py b/lib/galaxy/managers/collections.py
@@ -175,6 +175,7 @@ def create(
         flush=True,
         completed_job=None,
         output_name=None,
+        fields=None,
     ):
         """
         PRECONDITION: security checks on ability to add to parent
@@ -199,6 +200,7 @@ def create(
                 hide_source_items=hide_source_items,
                 copy_elements=copy_elements,
                 history=history,
+                fields=fields,
             )
 
         implicit_inputs = []
@@ -242,8 +244,11 @@ def _create_instance_for_collection(
                 name=name,
             )
             assert isinstance(dataset_collection_instance, model.HistoryDatasetCollectionAssociation)
+
             if implicit_inputs:
                 for input_name, input_collection in implicit_inputs:
+                    if getattr(input_collection, "ephemeral", False):
+                        input_collection = input_collection.persistent_object
                     dataset_collection_instance.add_implicit_input_collection(input_name, input_collection)
 
             if implicit_output_name:
@@ -285,17 +290,20 @@ def create_dataset_collection(
         hide_source_items=None,
         copy_elements=False,
         history=None,
+        fields=None,
     ):
         # Make sure at least one of these is None.
         assert element_identifiers is None or elements is None
-
         if element_identifiers is None and elements is None:
             raise RequestParameterInvalidException(ERROR_INVALID_ELEMENTS_SPECIFICATION)
         if not collection_type:
             raise RequestParameterInvalidException(ERROR_NO_COLLECTION_TYPE)
 
-        collection_type_description = self.collection_type_descriptions.for_collection_type(collection_type)
+        collection_type_description = self.collection_type_descriptions.for_collection_type(
+            collection_type, fields=fields
+        )
         has_subcollections = collection_type_description.has_subcollections()
+
         # If we have elements, this is an internal request, don't need to load
         # objects from identifiers.
         if elements is None:
@@ -319,8 +327,9 @@ def create_dataset_collection(
 
         if elements is not self.ELEMENTS_UNINITIALIZED:
             type_plugin = collection_type_description.rank_type_plugin()
-            dataset_collection = builder.build_collection(type_plugin, elements)
+            dataset_collection = builder.build_collection(type_plugin, elements, fields=fields)
         else:
+            # TODO: Pass fields here - need test case first.
             dataset_collection = model.DatasetCollection(populated=False)
         dataset_collection.collection_type = collection_type
         return dataset_collection
@@ -400,6 +409,8 @@ def _append_tags(self, dataset_collection_instance, implicit_inputs=None, tags=N
         tags = tags or {}
         implicit_inputs = implicit_inputs or []
         for _, v in implicit_inputs:
+            if getattr(v, "ephemeral", False):
+                v = v.persistent_object
             for tag in v.auto_propagated_tags:
                 tags[tag.value] = tag
         for _, tag in tags.items():