nf-core · Joon-Klaps · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -33,6 +33,7 @@
 - Update GitHub Actions ([#3237](https://github.com/nf-core/tools/pull/3237))
 - add `--dir/-d` option to schema commands ([#3247](https://github.com/nf-core/tools/pull/3247))
 - Update pre-commit hook astral-sh/ruff-pre-commit to v0.7.1 ([#3250](https://github.com/nf-core/tools/pull/3250))
+- init ([#3261](https://github.com/nf-core/tools/pull/3261))
 
 ## [v3.0.2 - Titanium Tapir Patch](https://github.com/nf-core/tools/releases/tag/3.0.2) - [2024-10-11]
 

diff --git a/nf_core/pipeline-template/conf/test.config b/nf_core/pipeline-template/conf/test.config
@@ -31,4 +31,10 @@ params {
     // Genome references
     genome = 'R64-1-1'
     {%- endif %}
+
+    {% if downstream_samplesheets -%}
+    // Downstream samplesheets
+    generate_downstream_samplesheets = true
+    generate_pipeline_samplesheets   = 'rnaseq'
+    {%- endif %}
 }
diff --git a/nf_core/pipeline-template/conf/test_full.config b/nf_core/pipeline-template/conf/test_full.config
@@ -26,4 +26,10 @@ params {
     // Fasta references
     fasta = params.pipelines_testdata_base_path + 'viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz'
     {%- endif %}
+
+    {% if downstream_samplesheets -%}
+    // Downstream samplesheets
+    generate_downstream_samplesheets = true
+    generate_pipeline_samplesheets   = 'rnaseq'
+    {%- endif %}
 }
diff --git a/nf_core/pipeline-template/nextflow.config b/nf_core/pipeline-template/nextflow.config
@@ -29,6 +29,13 @@ params {
     {% if citations %}multiqc_methods_description = null{% endif %}
     {%- endif %}
 
+    {% if downstream_samplesheet -%}
+    // Downstream samplesheet generation
+    generate_downstream_samplesheets      = false
+    generate_pipeline_samplesheets        = null
+    generate_pipeline_samplesheets_format = 'csv'
+    {%- endif %}
+
     // Boilerplate options
     outdir                       = null
     {% if modules %}publish_dir_mode             = 'copy'{% endif %}

diff --git a/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/main.nf b/nf_core/pipeline-template/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -0,0 +1,67 @@
+
+
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    SUBWORKFLOW SPECIFIC FOR RNASEQ
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+// TODO: Update the following workflow to a specific pipeline
+workflow SAMPLESHEET_RNASEQ {
+    take:
+    ch_reads
+    format
+
+    main:
+
+    ch_list_for_samplesheet = ch_reads.map { meta, reads ->
+        def out_path     = file(params.outdir).toString() + '/relative/custom/path/'
+        def sample       = meta.id
+        def fastq_1      = meta.single_end  ? out_path + reads.getName() : out_path + reads[0].getName()
+        def fastq_2      = !meta.single_end ? out_path + reads[1].getName() : ""
+        def strandedness = "auto"
+        [sample: sample, fastq_1: fastq_1, fastq_2: fastq_2, strandedness: strandedness]
+    }
+
+    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/rnaseq", format)
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    SUBWORKFLOW CALLING PIPELINE SPECIFIC SAMPLESHEET GENERATION
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
+    take:
+    input
+
+    main:
+    def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")
+
+    // TODO: Add more pipelines here
+    if (downstreampipeline_names.contains('rnaseq')) {
+        SAMPLESHEET_RNASEQ(
+            input,
+            params.generate_pipeline_samplesheets_format
+        )
+    }
+}
+
+// Input can be any channel with a dictionary
+def channelToSamplesheet(ch_list_for_samplesheet, path, format) {
+    def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format]
+
+    def ch_header = ch_list_for_samplesheet
+
+    ch_header
+        .first()
+        .map { it.keySet().join(format_sep) }
+        .concat(ch_list_for_samplesheet.map { it.values().join(format_sep) })
+        .collectFile(
+            name: "${path}.${format}",
+            newLine: true,
+            sort: false
+        )
+}
diff --git a/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test b/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test
@@ -0,0 +1,79 @@
+
+nextflow_function {
+
+    name "Test Functions"
+    script "../main.nf"
+    tag 'subworkflows'
+    tag 'generate_downstream_samplesheets'
+    tag 'subworkflows/generate_downstream_samplesheets'
+
+    test("Test Function channelToSamplesheet - csv") {
+
+        function "channelToSamplesheet"
+
+        when {
+            function {
+                """
+                // define inputs of the function here. Example:
+                input[0] = Channel.of(
+                    [
+                        sample: 'test-pe',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        strandedness: 'auto'
+                    ],
+                    [
+                        sample: 'test-se',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: '',
+                        strandedness: 'auto'
+                    ])
+                input[1] = "$outputDir/test.csv"
+                input[2] = "csv"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert function.success },
+                { assert snapshot(function.result).match() }
+            )
+        }
+    }
+
+    test("Test Function channelToSamplesheet - tsv") {
+
+        function "channelToSamplesheet"
+
+        when {
+            function {
+                """
+                // define inputs of the function here. Example:
+                input[0] = Channel.of(
+                    [
+                        sample: 'test-pe',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        strandedness: 'auto'
+                    ],
+                    [
+                        sample: 'test-se',
+                        fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
+                        fastq_2: '',
+                        strandedness: 'auto'
+                    ])
+                input[1] = "$outputDir/test.tsv"
+                input[2] = "tsv"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert function.success },
+                { assert snapshot(function.result).match() }
+            )
+        }
+    }
+}
diff --git a/...late/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test.snap b/...late/subworkflows/local/generate_downstream_samplesheets/tests/main.function.nf.test.snap
@@ -0,0 +1,26 @@
+{
+    "Test Function channelToSamplesheet - tsv": {
+        "content": [
+            {
+
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.1",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-10-30T10:06:13.403158303"
+    },
+    "Test Function channelToSamplesheet - csv": {
+        "content": [
+            {
+
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.1",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-10-30T10:06:02.487840724"
+    }
+}
diff --git a/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test b/...-template/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test
@@ -0,0 +1,45 @@
+nextflow_workflow {
+
+    name "Test Workflow GENERATE_DOWNSTREAM_SAMPLESHEETS"
+    script "../main.nf"
+    workflow "GENERATE_DOWNSTREAM_SAMPLESHEETS"
+    tag 'subworkflows'
+    tag 'generate_downstream_samplesheets'
+    tag 'subworkflows/generated_downstream_samplesheets'
+
+    test("Test worfklow rnaseq") {
+        when {
+            params {
+                outdir                                = "."
+                generate_pipeline_samplesheets        = 'rnaseq'
+                generate_pipeline_samplesheets_format = 'csv'
+            }
+            workflow {
+                """
+                input[0] = Channel.of(
+                    [
+                        [ id:'test', single_end:false ],
+                        [file('test_1.fastq.gz', checkIfExists: false), file('test_2.fastq.gz', checkIfExists: false)]
+                    ],
+                    [
+                        [id: 'test-se', single_end: true],
+                        file('test_1.fastq.gz', checkIfExists: false)
+                    ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success },
+                { assert snapshot(
+                    [
+                        "${params.outdir}/downstream_samplesheets/rnaseq.csv"
+                    ]).match()
+                }
+            )
+        }
+    }
+
+}
diff --git a/...late/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test.snap b/...late/subworkflows/local/generate_downstream_samplesheets/tests/main.workflow.nf.test.snap
@@ -0,0 +1,14 @@
+{
+    "Test worfklow rnaseq": {
+        "content": [
+            [
+                "./downstream_samplesheets/rnaseq.csv"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.1",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-10-30T09:34:46.912767154"
+    }
+}
diff --git a/nf_core/pipeline-template/workflows/pipeline.nf b/nf_core/pipeline-template/workflows/pipeline.nf
@@ -11,6 +11,7 @@
 {% if multiqc %}include { paramsSummaryMultiqc   } from '../subworkflows/nf-core/utils_nfcore_pipeline'{% endif %}
 include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
 {% if citations or multiqc %}include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_{{ short_name }}_pipeline'{% endif %}
+{% if downstream_samplesheet %}include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets'{% endif %}
 {%- endif %}
 
 /*
@@ -41,6 +42,18 @@ workflow {{ short_name|upper }} {
     ch_versions = ch_versions.mix(FASTQC.out.versions.first())
     {%- endif %}
 
+
+    {% if downstream_samplesheet %}
+    //
+    // SUBWORKFLOW: Generate downstream samplesheets
+    //
+    if (params.generate_downstream_samplesheets) {
+        GENERATE_DOWNSTREAM_SAMPLESHEETS(
+            ch_samplesheet
+        )
+    }
+    {% endif %}
+
     //
     // Collate and save software versions
     //

diff --git a/nf_core/pipelines/create/template_features.yml b/nf_core/pipelines/create/template_features.yml
@@ -279,6 +279,17 @@ modules:
       - "modules.json"
   nfcore_pipelines: False
   custom_pipelines: True
+downstream_samplesheet:
+  skippable_paths:
+    - "subworkflows/local/generate_downstream_samplesheets"
+  short_description: "Generate downstream samplesheets"
+  description: "The pipeline will include the generate_downstream_samplesheets subworkflow for the generation of a samplesheet for other downstream pipelines."
+  help_text: |
+    The pipeline will include the generate_downstream_samplesheets subworkflow.
+
+    The subworkflow generate_downstream_samplesheets provides a base template for generating samplesheets by taking a specified input channel of i.e. reads or fasta extracts its metadata for generating samplesheets.
+  nfcore_pipelines: True
+  custom_pipelines: True
 changelog:
   skippable_paths:
     - "CHANGELOG.md"