diff --git a/.github/config/wordlist.txt b/.github/config/wordlist.txt index 039d4ab0..1f9f8642 100644 --- a/.github/config/wordlist.txt +++ b/.github/config/wordlist.txt @@ -25,6 +25,7 @@ chilton cisphobia classname cmd +colonnelli commandinputarrayschema commandinputenumschema commandinputparameter @@ -54,6 +55,11 @@ cryptographic ctbrown curii cwl +cwlarrayschema +cwlinputfile +cwlobjecttype +cwlrecordfield +cwlrecordschema cwltest cwltool cwltype @@ -108,6 +114,7 @@ gmail gunzip hervé https +iacopo iana ietf implementer @@ -122,6 +129,7 @@ inplaceupdaterequirement inputarrayschema inputenumschema inputbinding +inputfile inputformat inputrecordfield inputrecordschema @@ -270,6 +278,7 @@ tijanić timelimit toc tooltimelimit +torino transcode txt typedef diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0928beaf..8266c80b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,9 +6,9 @@ name: CI on: # Triggers the workflow on push or pull request events but only for the main branch push: - branches: [ main ] + branches: [ main, codegen ] pull_request: - branches: [ main, 1.2.1_proposed ] + branches: [ main, codegen ] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: diff --git a/.gitignore b/.gitignore index 2a25de39..21f3238d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,4 @@ *.egg-info *egg[s] __pycache__ -.python_history +.python_history \ No newline at end of file diff --git a/Base.yml b/Base.yml new file mode 100644 index 00000000..c2ca0d5b --- /dev/null +++ b/Base.yml @@ -0,0 +1,498 @@ +$base: "https://w3id.org/cwl/cwl#" + +$namespaces: + cwl: "https://w3id.org/cwl/cwl#" + sld: "https://w3id.org/cwl/salad#" + +$graph: + +- name: CWLType + type: enum + extends: "sld:PrimitiveType" + symbols: + - cwl:File + - cwl:Directory + doc: + - "Extends primitive types with the concept of a file and directory as a builtin type." + - "File: A File object" + - "Directory: A Directory object" + +- name: CWLArraySchema + type: record + extends: "sld:ArraySchema" + fields: + items: + type: + - PrimitiveType + - CWLRecordSchema + - EnumSchema + - CWLArraySchema + - string + - type: array + items: + - PrimitiveType + - CWLRecordSchema + - EnumSchema + - CWLArraySchema + - string + jsonldPredicate: + _id: "sld:items" + _type: "@vocab" + refScope: 2 + doc: "Defines the type of the array elements." + +- name: CWLRecordField + type: record + extends: "sld:RecordField" + fields: + - name: type + type: + - PrimitiveType + - CWLRecordSchema + - EnumSchema + - CWLArraySchema + - string + - type: array + items: + - PrimitiveType + - CWLRecordSchema + - EnumSchema + - CWLArraySchema + - string + jsonldPredicate: + _id: sld:type + _type: "@vocab" + typeDSL: true + refScope: 2 + doc: | + The field type + +- name: CWLRecordSchema + type: record + extends: "sld:RecordSchema" + fields: + fields: + type: CWLRecordField[]? + jsonldPredicate: + _id: sld:fields + mapSubject: name + mapPredicate: type + doc: "Defines the fields of the record." + +- name: File + type: record + docParent: "#CWLType" + doc: | + Represents a file (or group of files when `secondaryFiles` is provided) that + will be accessible by tools using standard POSIX file system call API such as + open(2) and read(2). + + Files are represented as objects with `class` of `File`. File objects have + a number of properties that provide metadata about the file. + + The `location` property of a File is a IRI that uniquely identifies the + file. Implementations must support the `file://` IRI scheme and may support + other schemes such as `http://` and `https://`. The value of `location` may also be a + relative reference, in which case it must be resolved relative to the IRI + of the document it appears in. Alternately to `location`, implementations + must also accept the `path` property on File, which must be a filesystem + path available on the same host as the CWL runner (for inputs) or the + runtime environment of a command line tool execution (for command line tool + outputs). + + If no `location` or `path` is specified, a file object must specify + `contents` with the UTF-8 text content of the file. This is a "file + literal". File literals do not correspond to external resources, but are + created on disk with `contents` with when needed for executing a tool. + Where appropriate, expressions can return file literals to define new files + on a runtime. The maximum size of `contents` is 64 kilobytes. + + The `basename` property defines the filename on disk where the file is + staged. This may differ from the resource name. If not provided, + `basename` must be computed from the last path part of `location` and made + available to expressions. + + The `secondaryFiles` property is a list of File or Directory objects that + must be staged in the same directory as the primary file. It is an error + for file names to be duplicated in `secondaryFiles`. + + The `size` property is the size in bytes of the File. It must be computed + from the resource and made available to expressions. The `checksum` field + contains a cryptographic hash of the file content for use it verifying file + contents. Implementations may, at user option, enable or disable + computation of the `checksum` field for performance or other reasons. + However, the ability to compute output checksums is required to pass the + CWL conformance test suite. + + When executing a CommandLineTool, the files and secondary files may be + staged to an arbitrary directory, but must use the value of `basename` for + the filename. The `path` property must be file path in the context of the + tool execution runtime (local to the compute node, or within the executing + container). All computed properties should be available to expressions. + File literals also must be staged and `path` must be set. + + When collecting CommandLineTool outputs, `glob` matching returns file paths + (with the `path` property) and the derived properties. This can all be + modified by `outputEval`. Alternately, if the file `cwl.output.json` is + present in the output, `outputBinding` is ignored. + + File objects in the output must provide either a `location` IRI or a `path` + property in the context of the tool execution runtime (local to the compute + node, or within the executing container). + + When evaluating an ExpressionTool, file objects must be referenced via + `location` (the expression tool does not have access to files on disk so + `path` is meaningless) or as file literals. It is legal to return a file + object with an existing `location` but a different `basename`. The + `loadContents` field of ExpressionTool inputs behaves the same as on + CommandLineTool inputs, however it is not meaningful on the outputs. + + An ExpressionTool may forward file references from input to output by using + the same value for `location`. + + fields: + - name: class + type: + type: enum + name: File_class + symbols: + - cwl:File + jsonldPredicate: + _id: "@type" + _type: "@vocab" + doc: Must be `File` to indicate this object describes a file. + - name: location + type: string? + doc: | + An IRI that identifies the file resource. This may be a relative + reference, in which case it must be resolved using the base IRI of the + document. The location may refer to a local or remote resource; the + implementation must use the IRI to retrieve file content. If an + implementation is unable to retrieve the file content stored at a + remote resource (due to unsupported protocol, access denied, or other + issue) it must signal an error. + + If the `location` field is not provided, the `contents` field must be + provided. The implementation must assign a unique identifier for + the `location` field. + + If the `path` field is provided but the `location` field is not, an + implementation may assign the value of the `path` field to `location`, + then follow the rules above. + jsonldPredicate: + _id: "@id" + _type: "@id" + - name: path + type: string? + doc: | + The local host path where the File is available when a CommandLineTool is + executed. This field must be set by the implementation. The final + path component must match the value of `basename`. This field + must not be used in any other context. The command line tool being + executed must be able to access the file at `path` using the POSIX + `open(2)` syscall. + + As a special case, if the `path` field is provided but the `location` + field is not, an implementation may assign the value of the `path` + field to `location`, and remove the `path` field. + + If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) + (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, + ``, ``, and ``) or characters + [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) + for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) + then implementations may terminate the process with a + `permanentFailure`. + jsonldPredicate: + "_id": "cwl:path" + "_type": "@id" + - name: basename + type: string? + doc: | + The base name of the file, that is, the name of the file without any + leading directory path. The base name must not contain a slash `/`. + + If not provided, the implementation must set this field based on the + `location` field by taking the final path component after parsing + `location` as an IRI. If `basename` is provided, it is not required to + match the value from `location`. + + When this file is made available to a CommandLineTool, it must be named + with `basename`, i.e. the final component of the `path` field must match + `basename`. + jsonldPredicate: "cwl:basename" + - name: dirname + type: string? + doc: | + The name of the directory containing file, that is, the path leading up + to the final slash in the path such that `dirname + '/' + basename == + path`. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. + - name: nameroot + type: string? + doc: | + The basename root such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. For the purposes of path splitting leading periods on the + basename are ignored; a basename of `.cshrc` will have a nameroot of + `.cshrc`. + + The implementation must set this field automatically based on the value + of `basename` prior to evaluating parameter references or expressions. + - name: nameext + type: string? + doc: | + The basename extension such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. Leading periods on the basename are ignored; a basename of + `.cshrc` will have an empty `nameext`. + + The implementation must set this field automatically based on the value + of `basename` prior to evaluating parameter references or expressions. + - name: checksum + type: string? + doc: | + Optional hash code for validating file integrity. Currently, must be in the form + "sha1$ + hexadecimal string" using the SHA-1 algorithm. + - name: size + type: + - "null" + - int + - long + doc: Optional file size (in bytes) + - name: "secondaryFiles" + type: + - "null" + - type: array + items: [File, Directory] + jsonldPredicate: + _id: "cwl:secondaryFiles" + secondaryFilesDSL: true + doc: | + A list of additional files or directories that are associated with the + primary file and must be transferred alongside the primary file. + Examples include indexes of the primary file, or external references + which must be included when loading primary document. A file object + listed in `secondaryFiles` may itself include `secondaryFiles` for + which the same rules apply. + - name: format + type: string? + jsonldPredicate: + _id: cwl:format + _type: "@id" + identity: true + noLinkCheck: true + doc: | + The format of the file: this must be an IRI of a concept node that + represents the file format, preferably defined within an ontology. + If no ontology is available, file formats may be tested by exact match. + + Reasoning about format compatibility must be done by checking that an + input file format is the same, `owl:equivalentClass` or + `rdfs:subClassOf` the format required by the input parameter. + `owl:equivalentClass` is transitive with `rdfs:subClassOf`, e.g. if + ` owl:equivalentClass ` and ` owl:subclassOf ` then infer + ` owl:subclassOf `. + + File format ontologies may be provided in the "$schemas" metadata at the + root of the document. If no ontologies are specified in `$schemas`, the + runtime may perform exact file format matches. + - name: contents + type: string? + doc: | + File contents literal. + + If neither `location` nor `path` is provided, `contents` must be + non-null. The implementation must assign a unique identifier for the + `location` field. When the file is staged as input to CommandLineTool, + the value of `contents` must be written to a file. + + If `contents` is set as a result of a Javascript expression, + an `entry` in `InitialWorkDirRequirement`, or read in from + `cwl.output.json`, there is no specified upper limit on the + size of `contents`. Implementations may have practical limits + on the size of `contents` based on memory and storage + available to the workflow runner or other factors. + + If the `loadContents` field of an `InputParameter` or + `OutputParameter` is true, and the input or output File object + `location` is valid, the file must be a UTF-8 text file 64 KiB + or smaller, and the implementation must read the entire + contents of the file and place it in the `contents` field. If + the size of the file is greater than 64 KiB, the + implementation must raise a fatal error. + + +- name: Directory + type: record + docAfter: "#File" + doc: | + Represents a directory to present to a command line tool. + + Directories are represented as objects with `class` of `Directory`. Directory objects have + a number of properties that provide metadata about the directory. + + The `location` property of a Directory is a IRI that uniquely identifies + the directory. Implementations must support the file:// IRI scheme and may + support other schemes such as http://. Alternately to `location`, + implementations must also accept the `path` property on Directory, which + must be a filesystem path available on the same host as the CWL runner (for + inputs) or the runtime environment of a command line tool execution (for + command line tool outputs). + + A Directory object may have a `listing` field. This is a list of File and + Directory objects that are contained in the Directory. For each entry in + `listing`, the `basename` property defines the name of the File or + Subdirectory when staged to disk. If `listing` is not provided, the + implementation must have some way of fetching the Directory listing at + runtime based on the `location` field. + + If a Directory does not have `location`, it is a Directory literal. A + Directory literal must provide `listing`. Directory literals must be + created on disk at runtime as needed. + + The resources in a Directory literal do not need to have any implied + relationship in their `location`. For example, a Directory listing may + contain two files located on different hosts. It is the responsibility of + the runtime to ensure that those files are staged to disk appropriately. + Secondary files associated with files in `listing` must also be staged to + the same Directory. + + When executing a CommandLineTool, Directories must be recursively staged + first and have local values of `path` assigned. + + Directory objects in CommandLineTool output must provide either a + `location` IRI or a `path` property in the context of the tool execution + runtime (local to the compute node, or within the executing container). + + An ExpressionTool may forward file references from input to output by using + the same value for `location`. + + Name conflicts (the same `basename` appearing multiple times in `listing` + or in any entry in `secondaryFiles` in the listing) is a fatal error. + + fields: + - name: class + type: + type: enum + name: Directory_class + symbols: + - cwl:Directory + jsonldPredicate: + _id: "@type" + _type: "@vocab" + doc: Must be `Directory` to indicate this object describes a Directory. + - name: location + type: string? + doc: | + An IRI that identifies the directory resource. This may be a relative + reference, in which case it must be resolved using the base IRI of the + document. The location may refer to a local or remote resource. If + the `listing` field is not set, the implementation must use the + location IRI to retrieve directory listing. If an implementation is + unable to retrieve the directory listing stored at a remote resource (due to + unsupported protocol, access denied, or other issue) it must signal an + error. + + If the `location` field is not provided, the `listing` field must be + provided. The implementation must assign a unique identifier for + the `location` field. + + If the `path` field is provided but the `location` field is not, an + implementation may assign the value of the `path` field to `location`, + then follow the rules above. + jsonldPredicate: + _id: "@id" + _type: "@id" + - name: path + type: string? + doc: | + The local path where the Directory is made available prior to executing a + CommandLineTool. This must be set by the implementation. This field + must not be used in any other context. The command line tool being + executed must be able to access the directory at `path` using the POSIX + `opendir(2)` syscall. + + If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) + (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, + ``, ``, and ``) or characters + [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) + for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) + then implementations may terminate the process with a + `permanentFailure`. + jsonldPredicate: + _id: "cwl:path" + _type: "@id" + - name: basename + type: string? + doc: | + The base name of the directory, that is, the name of the file without any + leading directory path. The base name must not contain a slash `/`. + + If not provided, the implementation must set this field based on the + `location` field by taking the final path component after parsing + `location` as an IRI. If `basename` is provided, it is not required to + match the value from `location`. + + When this file is made available to a CommandLineTool, it must be named + with `basename`, i.e. the final component of the `path` field must match + `basename`. + jsonldPredicate: "cwl:basename" + - name: listing + type: + - "null" + - type: array + items: [File, Directory] + doc: | + List of files or subdirectories contained in this directory. The name + of each file or subdirectory is determined by the `basename` field of + each `File` or `Directory` object. It is an error if a `File` shares a + `basename` with any other entry in `listing`. If two or more + `Directory` object share the same `basename`, this must be treated as + equivalent to a single subdirectory with the listings recursively + merged. + jsonldPredicate: + _id: "cwl:listing" + + +- name: CWLObjectType + type: union + names: + - boolean + - int + - long + - float + - double + - string + - File + - Directory + - type: array + items: + - "null" + - CWLObjectType + - type: map + values: + - "null" + - CWLObjectType + doc: | + Generic type representing a valid CWL object. It is used to represent + `default` values passed to CWL `InputParameter` and `WorkflowStepInput` + record fields. + +- name: CWLInputFile + type: map + values: + - "null" + - type: array + items: ProcessRequirement + - CWLObjectType + doc: | + Type representing a valid CWL input file as a `map, CWLObjectType>>`. + jsonldPredicate: + _id: "cwl:inputfile" + _container: "@list" + noLinkCheck: true diff --git a/Process.yml b/Process.yml index 41d24a2d..acc77c07 100644 --- a/Process.yml +++ b/Process.yml @@ -14,6 +14,8 @@ $graph: - $import: "salad/schema_salad/metaschema/metaschema_base.yml" +- $import: Base.yml + - name: BaseTypesDoc type: documentation doc: | @@ -47,395 +49,6 @@ $graph: - cwl:v1.2.0-dev5 - cwl:v1.2 -- name: CWLType - type: enum - extends: "sld:PrimitiveType" - symbols: - - cwl:File - - cwl:Directory - doc: - - "Extends primitive types with the concept of a file and directory as a builtin type." - - "File: A File object" - - "Directory: A Directory object" - -- name: File - type: record - docParent: "#CWLType" - doc: | - Represents a file (or group of files when `secondaryFiles` is provided) that - will be accessible by tools using standard POSIX file system call API such as - open(2) and read(2). - - Files are represented as objects with `class` of `File`. File objects have - a number of properties that provide metadata about the file. - - The `location` property of a File is a IRI that uniquely identifies the - file. Implementations must support the `file://` IRI scheme and may support - other schemes such as `http://` and `https://`. The value of `location` may also be a - relative reference, in which case it must be resolved relative to the IRI - of the document it appears in. Alternately to `location`, implementations - must also accept the `path` property on File, which must be a filesystem - path available on the same host as the CWL runner (for inputs) or the - runtime environment of a command line tool execution (for command line tool - outputs). - - If no `location` or `path` is specified, a file object must specify - `contents` with the UTF-8 text content of the file. This is a "file - literal". File literals do not correspond to external resources, but are - created on disk with `contents` with when needed for executing a tool. - Where appropriate, expressions can return file literals to define new files - on a runtime. The maximum size of `contents` is 64 kilobytes. - - The `basename` property defines the filename on disk where the file is - staged. This may differ from the resource name. If not provided, - `basename` must be computed from the last path part of `location` and made - available to expressions. - - The `secondaryFiles` property is a list of File or Directory objects that - must be staged in the same directory as the primary file. It is an error - for file names to be duplicated in `secondaryFiles`. - - The `size` property is the size in bytes of the File. It must be computed - from the resource and made available to expressions. The `checksum` field - contains a cryptographic hash of the file content for use it verifying file - contents. Implementations may, at user option, enable or disable - computation of the `checksum` field for performance or other reasons. - However, the ability to compute output checksums is required to pass the - CWL conformance test suite. - - When executing a CommandLineTool, the files and secondary files may be - staged to an arbitrary directory, but must use the value of `basename` for - the filename. The `path` property must be file path in the context of the - tool execution runtime (local to the compute node, or within the executing - container). All computed properties should be available to expressions. - File literals also must be staged and `path` must be set. - - When collecting CommandLineTool outputs, `glob` matching returns file paths - (with the `path` property) and the derived properties. This can all be - modified by `outputEval`. Alternately, if the file `cwl.output.json` is - present in the output, `outputBinding` is ignored. - - File objects in the output must provide either a `location` IRI or a `path` - property in the context of the tool execution runtime (local to the compute - node, or within the executing container). - - When evaluating an ExpressionTool, file objects must be referenced via - `location` (the expression tool does not have access to files on disk so - `path` is meaningless) or as file literals. It is legal to return a file - object with an existing `location` but a different `basename`. The - `loadContents` field of ExpressionTool inputs behaves the same as on - CommandLineTool inputs, however it is not meaningful on the outputs. - - An ExpressionTool may forward file references from input to output by using - the same value for `location`. - - fields: - - name: class - type: - type: enum - name: File_class - symbols: - - cwl:File - jsonldPredicate: - _id: "@type" - _type: "@vocab" - doc: Must be `File` to indicate this object describes a file. - - name: location - type: string? - doc: | - An IRI that identifies the file resource. This may be a relative - reference, in which case it must be resolved using the base IRI of the - document. The location may refer to a local or remote resource; the - implementation must use the IRI to retrieve file content. If an - implementation is unable to retrieve the file content stored at a - remote resource (due to unsupported protocol, access denied, or other - issue) it must signal an error. - - If the `location` field is not provided, the `contents` field must be - provided. The implementation must assign a unique identifier for - the `location` field. - - If the `path` field is provided but the `location` field is not, an - implementation may assign the value of the `path` field to `location`, - then follow the rules above. - jsonldPredicate: - _id: "@id" - _type: "@id" - - name: path - type: string? - doc: | - The local host path where the File is available when a CommandLineTool is - executed. This field must be set by the implementation. The final - path component must match the value of `basename`. This field - must not be used in any other context. The command line tool being - executed must be able to access the file at `path` using the POSIX - `open(2)` syscall. - - As a special case, if the `path` field is provided but the `location` - field is not, an implementation may assign the value of the `path` - field to `location`, and remove the `path` field. - - If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) - (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, - ``, ``, and ``) or characters - [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) - for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) - then implementations may terminate the process with a - `permanentFailure`. - jsonldPredicate: - "_id": "cwl:path" - "_type": "@id" - - name: basename - type: string? - doc: | - The base name of the file, that is, the name of the file without any - leading directory path. The base name must not contain a slash `/`. - - If not provided, the implementation must set this field based on the - `location` field by taking the final path component after parsing - `location` as an IRI. If `basename` is provided, it is not required to - match the value from `location`. - - When this file is made available to a CommandLineTool, it must be named - with `basename`, i.e. the final component of the `path` field must match - `basename`. - jsonldPredicate: "cwl:basename" - - name: dirname - type: string? - doc: | - The name of the directory containing file, that is, the path leading up - to the final slash in the path such that `dirname + '/' + basename == - path`. - - The implementation must set this field based on the value of `path` - prior to evaluating parameter references or expressions in a - CommandLineTool document. This field must not be used in any other - context. - - name: nameroot - type: string? - doc: | - The basename root such that `nameroot + nameext == basename`, and - `nameext` is empty or begins with a period and contains at most one - period. For the purposes of path splitting leading periods on the - basename are ignored; a basename of `.cshrc` will have a nameroot of - `.cshrc`. - - The implementation must set this field automatically based on the value - of `basename` prior to evaluating parameter references or expressions. - - name: nameext - type: string? - doc: | - The basename extension such that `nameroot + nameext == basename`, and - `nameext` is empty or begins with a period and contains at most one - period. Leading periods on the basename are ignored; a basename of - `.cshrc` will have an empty `nameext`. - - The implementation must set this field automatically based on the value - of `basename` prior to evaluating parameter references or expressions. - - name: checksum - type: string? - doc: | - Optional hash code for validating file integrity. Currently, must be in the form - "sha1$ + hexadecimal string" using the SHA-1 algorithm. - - name: size - type: - - "null" - - int - - long - doc: Optional file size (in bytes) - - name: "secondaryFiles" - type: - - "null" - - type: array - items: [File, Directory] - jsonldPredicate: - _id: "cwl:secondaryFiles" - secondaryFilesDSL: true - doc: | - A list of additional files or directories that are associated with the - primary file and must be transferred alongside the primary file. - Examples include indexes of the primary file, or external references - which must be included when loading primary document. A file object - listed in `secondaryFiles` may itself include `secondaryFiles` for - which the same rules apply. - - name: format - type: string? - jsonldPredicate: - _id: cwl:format - _type: "@id" - identity: true - doc: | - The format of the file: this must be an IRI of a concept node that - represents the file format, preferably defined within an ontology. - If no ontology is available, file formats may be tested by exact match. - - Reasoning about format compatibility must be done by checking that an - input file format is the same, `owl:equivalentClass` or - `rdfs:subClassOf` the format required by the input parameter. - `owl:equivalentClass` is transitive with `rdfs:subClassOf`, e.g. if - ` owl:equivalentClass ` and ` owl:subclassOf ` then infer - ` owl:subclassOf `. - - File format ontologies may be provided in the "$schemas" metadata at the - root of the document. If no ontologies are specified in `$schemas`, the - runtime may perform exact file format matches. - - name: contents - type: string? - doc: | - File contents literal. - - If neither `location` nor `path` is provided, `contents` must be - non-null. The implementation must assign a unique identifier for the - `location` field. When the file is staged as input to CommandLineTool, - the value of `contents` must be written to a file. - - If `contents` is set as a result of a Javascript expression, - an `entry` in `InitialWorkDirRequirement`, or read in from - `cwl.output.json`, there is no specified upper limit on the - size of `contents`. Implementations may have practical limits - on the size of `contents` based on memory and storage - available to the workflow runner or other factors. - - If the `loadContents` field of an `InputParameter` or - `OutputParameter` is true, and the input or output File object - `location` is valid, the file must be a UTF-8 text file 64 KiB - or smaller, and the implementation must read the entire - contents of the file and place it in the `contents` field. If - the size of the file is greater than 64 KiB, the - implementation must raise a fatal error. - - -- name: Directory - type: record - docAfter: "#File" - doc: | - Represents a directory to present to a command line tool. - - Directories are represented as objects with `class` of `Directory`. Directory objects have - a number of properties that provide metadata about the directory. - - The `location` property of a Directory is a IRI that uniquely identifies - the directory. Implementations must support the file:// IRI scheme and may - support other schemes such as http://. Alternately to `location`, - implementations must also accept the `path` property on Directory, which - must be a filesystem path available on the same host as the CWL runner (for - inputs) or the runtime environment of a command line tool execution (for - command line tool outputs). - - A Directory object may have a `listing` field. This is a list of File and - Directory objects that are contained in the Directory. For each entry in - `listing`, the `basename` property defines the name of the File or - Subdirectory when staged to disk. If `listing` is not provided, the - implementation must have some way of fetching the Directory listing at - runtime based on the `location` field. - - If a Directory does not have `location`, it is a Directory literal. A - Directory literal must provide `listing`. Directory literals must be - created on disk at runtime as needed. - - The resources in a Directory literal do not need to have any implied - relationship in their `location`. For example, a Directory listing may - contain two files located on different hosts. It is the responsibility of - the runtime to ensure that those files are staged to disk appropriately. - Secondary files associated with files in `listing` must also be staged to - the same Directory. - - When executing a CommandLineTool, Directories must be recursively staged - first and have local values of `path` assigned. - - Directory objects in CommandLineTool output must provide either a - `location` IRI or a `path` property in the context of the tool execution - runtime (local to the compute node, or within the executing container). - - An ExpressionTool may forward file references from input to output by using - the same value for `location`. - - Name conflicts (the same `basename` appearing multiple times in `listing` - or in any entry in `secondaryFiles` in the listing) is a fatal error. - - fields: - - name: class - type: - type: enum - name: Directory_class - symbols: - - cwl:Directory - jsonldPredicate: - _id: "@type" - _type: "@vocab" - doc: Must be `Directory` to indicate this object describes a Directory. - - name: location - type: string? - doc: | - An IRI that identifies the directory resource. This may be a relative - reference, in which case it must be resolved using the base IRI of the - document. The location may refer to a local or remote resource. If - the `listing` field is not set, the implementation must use the - location IRI to retrieve directory listing. If an implementation is - unable to retrieve the directory listing stored at a remote resource (due to - unsupported protocol, access denied, or other issue) it must signal an - error. - - If the `location` field is not provided, the `listing` field must be - provided. The implementation must assign a unique identifier for - the `location` field. - - If the `path` field is provided but the `location` field is not, an - implementation may assign the value of the `path` field to `location`, - then follow the rules above. - jsonldPredicate: - _id: "@id" - _type: "@id" - - name: path - type: string? - doc: | - The local path where the Directory is made available prior to executing a - CommandLineTool. This must be set by the implementation. This field - must not be used in any other context. The command line tool being - executed must be able to access the directory at `path` using the POSIX - `opendir(2)` syscall. - - If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) - (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, - ``, ``, and ``) or characters - [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) - for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) - then implementations may terminate the process with a - `permanentFailure`. - jsonldPredicate: - _id: "cwl:path" - _type: "@id" - - name: basename - type: string? - doc: | - The base name of the directory, that is, the name of the file without any - leading directory path. The base name must not contain a slash `/`. - - If not provided, the implementation must set this field based on the - `location` field by taking the final path component after parsing - `location` as an IRI. If `basename` is provided, it is not required to - match the value from `location`. - - When this file is made available to a CommandLineTool, it must be named - with `basename`, i.e. the final component of the `path` field must match - `basename`. - jsonldPredicate: "cwl:basename" - - name: listing - type: - - "null" - - type: array - items: [File, Directory] - doc: | - List of files or subdirectories contained in this directory. The name - of each file or subdirectory is determined by the `basename` field of - each `File` or `Directory` object. It is an error if a `File` shares a - `basename` with any other entry in `listing`. If two or more - `Directory` object share the same `basename`, this must be treated as - equivalent to a single subdirectory with the listings recursively - merged. - jsonldPredicate: - _id: "cwl:listing" - - name: Labeled type: record abstract: true @@ -586,6 +199,7 @@ $graph: _id: cwl:format _type: "@id" identity: true + noLinkCheck: true doc: | Only valid when `type: File` or is an array of `items: File`. @@ -608,6 +222,7 @@ $graph: _id: cwl:format _type: "@id" identity: true + noLinkCheck: true doc: | Only valid when `type: File` or is an array of `items: File`. @@ -670,13 +285,13 @@ $graph: - name: InputRecordField type: record - extends: [sld:RecordField, FieldBase, InputFormat, LoadContents] + extends: [CWLRecordField, FieldBase, InputFormat, LoadContents] specialize: - - specializeFrom: "sld:RecordSchema" + - specializeFrom: CWLRecordSchema specializeTo: InputRecordSchema - specializeFrom: "sld:EnumSchema" specializeTo: InputEnumSchema - - specializeFrom: "sld:ArraySchema" + - specializeFrom: CWLArraySchema specializeTo: InputArraySchema - specializeFrom: "sld:PrimitiveType" specializeTo: CWLType @@ -684,9 +299,9 @@ $graph: - name: InputRecordSchema type: record - extends: ["sld:RecordSchema", InputSchema] + extends: [CWLRecordSchema, InputSchema] specialize: - - specializeFrom: "sld:RecordField" + - specializeFrom: CWLRecordField specializeTo: InputRecordField @@ -697,13 +312,13 @@ $graph: - name: InputArraySchema type: record - extends: ["sld:ArraySchema", InputSchema] + extends: [CWLArraySchema, InputSchema] specialize: - - specializeFrom: "sld:RecordSchema" + - specializeFrom: CWLRecordSchema specializeTo: InputRecordSchema - specializeFrom: "sld:EnumSchema" specializeTo: InputEnumSchema - - specializeFrom: "sld:ArraySchema" + - specializeFrom: CWLArraySchema specializeTo: InputArraySchema - specializeFrom: "sld:PrimitiveType" specializeTo: CWLType @@ -711,13 +326,13 @@ $graph: - name: OutputRecordField type: record - extends: [sld:RecordField, FieldBase, OutputFormat] + extends: [CWLRecordField, FieldBase, OutputFormat] specialize: - - specializeFrom: "sld:RecordSchema" + - specializeFrom: CWLRecordSchema specializeTo: OutputRecordSchema - specializeFrom: "sld:EnumSchema" specializeTo: OutputEnumSchema - - specializeFrom: "sld:ArraySchema" + - specializeFrom: CWLArraySchema specializeTo: OutputArraySchema - specializeFrom: "sld:PrimitiveType" specializeTo: CWLType @@ -725,10 +340,10 @@ $graph: - name: OutputRecordSchema type: record - extends: ["sld:RecordSchema", "#OutputSchema"] + extends: [CWLRecordSchema, "#OutputSchema"] docParent: "#OutputParameter" specialize: - - specializeFrom: "sld:RecordField" + - specializeFrom: CWLRecordField specializeTo: OutputRecordField @@ -740,14 +355,14 @@ $graph: - name: OutputArraySchema type: record - extends: ["sld:ArraySchema", OutputSchema] + extends: [CWLArraySchema, OutputSchema] docParent: "#OutputParameter" specialize: - - specializeFrom: "sld:RecordSchema" + - specializeFrom: CWLRecordSchema specializeTo: OutputRecordSchema - specializeFrom: "sld:EnumSchema" specializeTo: OutputEnumSchema - - specializeFrom: "sld:ArraySchema" + - specializeFrom: CWLArraySchema specializeTo: OutputArraySchema - specializeFrom: "sld:PrimitiveType" specializeTo: CWLType @@ -759,13 +374,10 @@ $graph: extends: [Parameter, InputFormat, LoadContents] fields: - name: default - type: - - "null" - - File - - Directory - - Any + type: CWLObjectType? jsonldPredicate: - _id: sld:default + _id: "sld:default" + _container: "@list" noLinkCheck: true doc: | The default value to use for this parameter if the parameter is missing diff --git a/Workflow.yml b/Workflow.yml index 88a7df6f..bafe5b46 100644 --- a/Workflow.yml +++ b/Workflow.yml @@ -586,13 +586,14 @@ $graph: fields: - name: default - type: ["null", File, Directory, Any] + type: CWLObjectType? doc: | The default value for this parameter to use if either there is no `source` field, or the value produced by the `source` is `null`. The default must be applied prior to scattering or evaluating `valueFrom`. jsonldPredicate: _id: "sld:default" + _container: "@list" noLinkCheck: true - name: valueFrom type: diff --git a/render.bash b/render.bash index 05bf4852..eda4758c 100644 --- a/render.bash +++ b/render.bash @@ -4,12 +4,15 @@ if [[ -z "$WORKSPACE" ]] ; then fi mkdir -p $WORKSPACE repo=https://github.com/common-workflow-language/cwl-website \ -bn=$(basename $repo) +bn=$WORKSPACE/$(basename $repo) if [[ -d $bn ]] ; then (cd $bn && git fetch origin && git reset --hard origin/main) else - git clone $repo && pushd $bn; git checkout main ; git show --no-patch ; popd + git clone $repo $bn && pushd $bn; git checkout main ; git show --no-patch ; popd fi +echo "- basename: Base.yml" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml +echo " class: File" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml +echo " location: ../v1.2/Base.yml" >> $WORKSPACE/cwl-website/site/v1.2-deps.yaml CURDIR=$PWD pushd $WORKSPACE/cwl-website ; ln -s $CURDIR v1.2 ; popd cp cwlsite-v1.2-only-job.yaml $WORKSPACE/cwl-website/site/ diff --git a/requirements.txt b/requirements.txt index e1d38dee..db185255 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ cwltool - - +schema_salad >= 8.5.20231201181309 diff --git a/salad/schema_salad/metaschema/metaschema.yml b/salad/schema_salad/metaschema/metaschema.yml index de0b0931..f696e0ae 100644 --- a/salad/schema_salad/metaschema/metaschema.yml +++ b/salad/schema_salad/metaschema/metaschema.yml @@ -274,6 +274,7 @@ $graph: doc: | If true, indicates that the type is a valid at the document root. At least one type in a schema must be tagged with `documentRoot: true`. + jsonldPredicate: sld:documentRoot - name: SaladRecordField @@ -356,6 +357,31 @@ $graph: Indicates that this enum inherits symbols from a base enum. +- name: SaladMapSchema + docParent: "#Schema" + type: record + extends: [NamedType, MapSchema, SchemaDefinedType] + documentRoot: true + doc: | + Define a map type. + + +- name: SaladUnionSchema + docParent: "#Schema" + type: record + extends: [NamedType, UnionSchema, DocType] + documentRoot: true + doc: | + Define a union type. + fields: + - name: documentRoot + type: boolean? + doc: | + If true, indicates that the type is a valid at the document root. At + least one type in a schema must be tagged with `documentRoot: true`. + jsonldPredicate: sld:documentRoot + + - name: Documentation type: record docParent: "#Schema" diff --git a/salad/schema_salad/metaschema/metaschema_base.yml b/salad/schema_salad/metaschema/metaschema_base.yml index 5a37a33a..3bdf6390 100644 --- a/salad/schema_salad/metaschema/metaschema_base.yml +++ b/salad/schema_salad/metaschema/metaschema_base.yml @@ -77,6 +77,8 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema + - UnionSchema - string - type: array items: @@ -84,6 +86,8 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema + - UnionSchema - string jsonldPredicate: _id: sld:type @@ -91,7 +95,9 @@ $graph: typeDSL: true refScope: 2 doc: | - The field type + The field type. If it is an array, it indicates + that the field type is a union type of its elements. + Its elements may be duplicated. - name: RecordSchema @@ -168,6 +174,8 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema + - UnionSchema - string - type: array items: @@ -175,9 +183,91 @@ $graph: - RecordSchema - EnumSchema - ArraySchema + - MapSchema + - UnionSchema - string jsonldPredicate: _id: "sld:items" _type: "@vocab" refScope: 2 doc: "Defines the type of the array elements." + + +- name: MapSchema + type: record + fields: + type: + doc: "Must be `map`" + type: + type: enum + name: Map_name + symbols: + - "sld:map" + jsonldPredicate: + _id: "sld:type" + _type: "@vocab" + typeDSL: true + refScope: 2 + values: + type: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - MapSchema + - UnionSchema + - string + - type: array + items: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - MapSchema + - UnionSchema + - string + jsonldPredicate: + _id: "sld:values" + _type: "@vocab" + refScope: 2 + doc: "Defines the type of the map elements." + + +- name: UnionSchema + type: record + fields: + type: + doc: "Must be `union`" + type: + type: enum + name: Union_name + symbols: + - "sld:union" + jsonldPredicate: + _id: "sld:type" + _type: "@vocab" + typeDSL: true + refScope: 2 + names: + type: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - MapSchema + - UnionSchema + - string + - type: array + items: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - MapSchema + - UnionSchema + - string + jsonldPredicate: + _id: "sld:names" + _type: "@vocab" + refScope: 2 + doc: "Defines the type of the union elements." diff --git a/salad/schema_salad/metaschema/salad.md b/salad/schema_salad/metaschema/salad.md index d1aa8ddc..26d4a6b8 100644 --- a/salad/schema_salad/metaschema/salad.md +++ b/salad/schema_salad/metaschema/salad.md @@ -10,6 +10,7 @@ Contributors: * The developers of JSON-LD * Nebojša Tijanić , Seven Bridges Genomics * Michael R. Crusoe, ELIXIR-DE +* Iacopo Colonnelli, University of Torino # Abstract @@ -120,6 +121,16 @@ clarifications. resolved with [identifier resolution](#Identifier_resolution). Otherwise the field is resolved with [link resolution](#Link_resolution). +## Introduction to v1.3 + +This is the fifth version of the Schema Salad specification. It was created to +enhance code generation by representing CWL data types as specific Python objects +(instead of relying on the generic `Any` type). The following changes have been made: + +* Support for the Avro `map` schema +* Add named versions of the `map` and `union` Avro types +* Support for nested named `union` type definitions + ## References to Other Specifications **Javascript Object Notation (JSON)**: http://json.org