nf-core · Joon-Klaps · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -35,6 +35,7 @@
 - Update GitHub Actions ([#3237](https://github.com/nf-core/tools/pull/3237))
 - add `--dir/-d` option to schema commands ([#3247](https://github.com/nf-core/tools/pull/3247))
 - Update pre-commit hook astral-sh/ruff-pre-commit to v0.7.1 ([#3250](https://github.com/nf-core/tools/pull/3250))
+- Add `downstream_samplesheets` to skip_features and GENERATE_DOWNSTREAM_SAMPLESHEETS to template ([#3261](https://github.com/nf-core/tools/pull/3261))
 
 ## [v3.0.2 - Titanium Tapir Patch](https://github.com/nf-core/tools/releases/tag/3.0.2) - [2024-10-11]
 

diff --git a/nf_core/pipeline-template/conf/test.config b/nf_core/pipeline-template/conf/test.config
@@ -31,4 +31,10 @@ params {
     // Genome references
     genome = 'R64-1-1'
     {%- endif %}
+
+    {% if downstream_samplesheet -%}
+    // Downstream samplesheets
+    generate_downstream_samplesheets = true
+    generate_pipeline_samplesheets   = 'rnaseq,sarek'
+    {%- endif %}
 }
diff --git a/nf_core/pipeline-template/conf/test_full.config b/nf_core/pipeline-template/conf/test_full.config
@@ -26,4 +26,10 @@ params {
     // Fasta references
     fasta = params.pipelines_testdata_base_path + 'viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz'
     {%- endif %}
+
+    {% if downstream_samplesheet -%}
+    // Downstream samplesheets
+    generate_downstream_samplesheets = true
+    generate_pipeline_samplesheets   = 'sarek,rnaseq'
+    {%- endif %}
 }
diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config
@@ -29,6 +29,13 @@ params {
     {% if citations %}multiqc_methods_description = null{% endif %}
     {%- endif %}
 
+    {% if downstream_samplesheet -%}
+    // Downstream samplesheet generation
+    generate_downstream_samplesheets      = false
+    generate_pipeline_samplesheets        = null
+    generate_pipeline_samplesheets_format = 'csv'
+    {%- endif %}
+
     // Boilerplate options
     outdir                       = null
     {% if modules %}publish_dir_mode             = 'copy'{% endif %}

diff --git a/nf_core/pipeline-template/nextflow_schema.json b/nf_core/pipeline-template/nextflow_schema.json
@@ -86,6 +86,30 @@
             }
         },
         {%- endif %}
+        {% if downstream_samplesheet %}
+        "generate_samplesheet_options": {
+            "title": "Downstream pipeline samplesheet generation options",
+            "type": "object",
+            "fa_icon": "fas fa-university",
+            "description": "Options for generating input samplesheets for complementary downstream pipelines.",
+            "properties": {
+                "generate_downstream_samplesheets": {
+                    "type": "boolean",
+                    "description": "Turn on generation of samplesheets for downstream pipelines."
+                },
+                "generate_pipeline_samplesheets": {
+                    "type": "string",
+                    "description": "Specify a comma separated string in quotes to specify which pipeline to generate a samplesheet for.",
+                    "pattern": "^(rnaseq|sarek)(?:,(sarek|rnaseq)){0,1}"
+                },
+                "generate_pipeline_samplesheets_format": {
+                    "type": "string",
+                    "description": "Specify the output format of the samplesheet.",
+                    "enum": ["txt", "tsv", "csv"],
+                }
+            }
+        },
+        {%- endif %}
         {%- if nf_core_configs %}
         "institutional_config_options": {
             "title": "Institutional config options",

diff --git a/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/main.nf b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -0,0 +1,105 @@
+
+
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    SUBWORKFLOW SPECIFIC FOR RNASEQ
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+// TODO nf-core: Update the following workflow to a specific pipeline
+workflow SAMPLESHEET_RNASEQ {
+    take:
+    ch_reads
+    format
+
+    main:
+
+    //TODO nf-core: customise to your needs
+    ch_list_for_samplesheet = ch_reads.map { meta, reads ->
+        //TODO nf-core: Update the path to the published output directory of the reads
+        def out_path     = file(params.outdir).toString() + '/relative/custom/path/'
+        def sample       = meta.id
+        def fastq_1      = meta.single_end  ? out_path + reads.getName() : out_path + reads[0].getName()
+        def fastq_2      = !meta.single_end ? out_path + reads[1].getName() : ""
+        def strandedness = "auto"
+        [sample: sample, fastq_1: fastq_1, fastq_2: fastq_2, strandedness: strandedness]
+    }
+
+    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/rnaseq", format)
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    SUBWORKFLOW SPECIFIC FOR SAREK
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+// TODO nf-core: Update the following workflow to a specific pipeline
+workflow SAMPLESHEET_SAREK {
+    take:
+    ch_reads
+    format
+
+    main:
+
+    //TODO nf-core: customise to your needs
+    ch_list_for_samplesheet = ch_reads.map { meta, reads ->
+        //TODO nf-core: Update the path to the published output directory of the reads
+        def out_path     = file(params.outdir).toString() + '/relative/custom/path/'
+        def patient      = meta.id
+        def sample       = meta.id
+        def lane         = ""
+        def fastq_1      = meta.single_end  ? out_path + reads.getName() : out_path + reads[0].getName()
+        def fastq_2      = !meta.single_end ? out_path + reads[1].getName() : ""
+        [ patient: patient, sample: sample, lane: lane, fastq_1: fastq_1, fastq_2: fastq_2 ]
+    }
+
+    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/sarek", format)
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    SUBWORKFLOW CALLING PIPELINE SPECIFIC SAMPLESHEET GENERATION
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
+    take:
+    input
+
+    main:
+    def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")
+
+    // TODO nf-coee: Add more pipelines here
+    if (downstreampipeline_names.contains('rnaseq')) {
+        SAMPLESHEET_RNASEQ(
+            input,
+            params.generate_pipeline_samplesheets_format
+        )
+    }
+
+    if (downstreampipeline_names.contains('rnaseq')) {
+        SAMPLESHEET_SAREK(
+            input,
+            params.generate_pipeline_samplesheets_format
+        )
+    }
+}
+
+// Input can be any channel with a dictionary
+def channelToSamplesheet(ch_list_for_samplesheet, path, format) {
+    def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format]
+
+    def ch_header = ch_list_for_samplesheet
+
+    ch_header
+        .first()
+        .map { it.keySet().join(format_sep) }
+        .concat(ch_list_for_samplesheet.map { it.values().join(format_sep) })
+        .collectFile(
+            name: "${path}.${format}",
+            newLine: true,
+            sort: false
+        )
+}
diff --git a/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test b/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test
@@ -0,0 +1,79 @@
+
+nextflow_function {
+
+    name "Test Functions"
+    script "../main.nf"
+    tag 'subworkflows'
+    tag 'generate_downstream_samplesheets'
+    tag 'subworkflows/generate_downstream_samplesheets'
+
+    test("Test Function channelToSamplesheet - csv") {
+
+        function "channelToSamplesheet"
+
+        when {
+            function {
+                """
+                // define inputs of the function here. Example:
+                input[0] = Channel.of(
+                    [
+                        sample: 'test-pe',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        strandedness: 'auto'
+                    ],
+                    [
+                        sample: 'test-se',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: '',
+                        strandedness: 'auto'
+                    ])
+                input[1] = "$outputDir/test.csv"
+                input[2] = "csv"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert function.success },
+                { assert snapshot(function.result).match() }
+            )
+        }
+    }
+
+    test("Test Function channelToSamplesheet - tsv") {
+
+        function "channelToSamplesheet"
+
+        when {
+            function {
+                """
+                // define inputs of the function here. Example:
+                input[0] = Channel.of(
+                    [
+                        sample: 'test-pe',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        strandedness: 'auto'
+                    ],
+                    [
+                        sample: 'test-se',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: '',
+                        strandedness: 'auto'
+                    ])
+                input[1] = "$outputDir/test.tsv"
+                input[2] = "tsv"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert function.success },
+                { assert snapshot(function.result).match() }
+            )
+        }
+    }
+}
diff --git a/...late/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test.snap b/...late/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test.snap
@@ -0,0 +1,26 @@
+{
+    "Test Function channelToSamplesheet - tsv": {
+        "content": [
+            {
+
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.1",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-10-30T10:06:13.403158303"
+    },
+    "Test Function channelToSamplesheet - csv": {
+        "content": [
+            {
+
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.1",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-10-30T10:06:02.487840724"
+    }
+}
diff --git a/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test b/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test
@@ -0,0 +1,46 @@
+nextflow_workflow {
+
+    name "Test Workflow GENERATE_DOWNSTREAM_SAMPLESHEETS"
+    script "../main.nf"
+    workflow "GENERATE_DOWNSTREAM_SAMPLESHEETS"
+    tag 'subworkflows'
+    tag 'generate_downstream_samplesheets'
+    tag 'subworkflows/generated_downstream_samplesheets'
+
+    test("Test worfklow rnaseq,sarek") {
+        when {
+            params {
+                outdir                                = "."
+                generate_pipeline_samplesheets        = 'rnaseq,sarek'
+                generate_pipeline_samplesheets_format = 'csv'
+            }
+            workflow {
+                """
+                input[0] = Channel.of(
+                    [
+                        [ id:'test', single_end:false ],
+                        [file('test_1.fastq.gz', checkIfExists: false), file('test_2.fastq.gz', checkIfExists: false)]
+                    ],
+                    [
+                        [id: 'test-se', single_end: true],
+                        file('test_1.fastq.gz', checkIfExists: false)
+                    ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    [
+                        "${params.outdir}/downstream_samplesheets/rnaseq.csv",
+                        "${params.outdir}/downstream_samplesheets/sarek.csv"
+                    ]).match()
+                }
+            )
+        }
+    }
+
+}
diff --git a/...late/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test.snap b/...late/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test.snap
@@ -0,0 +1,15 @@
+{
+    "Test worfklow rnaseq,sarek": {
+        "content": [
+            [
+                "./downstream_samplesheets/rnaseq.csv",
+                "./downstream_samplesheets/sarek.csv"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.1",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-10-31T15:44:42.743679838"
+    }
+}
diff --git a/nf_core/pipeline-template/workflows/pipeline.nf b/nf_core/pipeline-template/workflows/pipeline.nf
@@ -11,6 +11,7 @@
 {% if multiqc %}include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pipeline'{% endif %}
 include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 {% if citations or multiqc %}include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_{{ short_name }}_pipeline'{% endif %}
+{% if downstream_samplesheet %}include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets'{% endif %}
 {%- endif %}
 
 /*
@@ -41,6 +42,18 @@ workflow {{ short_name|upper }} {
     ch_versions = ch_versions.mix(FASTQC.out.versions.first())
     {%- endif %}
 
+
+    {% if downstream_samplesheet %}
+    //
+    // SUBWORKFLOW: Generate downstream samplesheets
+    //
+    if (params.generate_downstream_samplesheets) {
+        GENERATE_DOWNSTREAM_SAMPLESHEETS(
+            ch_samplesheet
+        )
+    }
+    {% endif %}
+
     //
     // Collate and save software versions
     //

diff --git a/nf_core/pipelines/create/template_features.yml b/nf_core/pipelines/create/template_features.yml
@@ -279,6 +279,17 @@ modules:
       - "modules.json"
   nfcore_pipelines: False
   custom_pipelines: True
+downstream_samplesheet:
+  skippable_paths:
+    - "subworkflows/local/generate_downstream_samplesheets"
+  short_description: "Generate downstream samplesheets"
+  description: "The pipeline will include the generate_downstream_samplesheets subworkflow for the generation of a samplesheet for other downstream pipelines."
+  help_text: |
+    The pipeline will include the `GENERATE_DOWNSTREAM_SAMPLESHEETS` subworkflow.
+
+    The subworkflow `GENERATE_DOWNSTREAM_SAMPLESHEETS` provides a base template for generating samplesheets by taking a specified input channel, *eg:* reads or fasta, and extracts its metadata for generating samplesheets.
+  nfcore_pipelines: True
+  custom_pipelines: True
 changelog:
   skippable_paths:
     - "CHANGELOG.md"