Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create template GENERATE_DOWNSTREAM_SAMPLESHEETS for tools #3261

Draft
wants to merge 9 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
- Update GitHub Actions ([#3237](https://github.com/nf-core/tools/pull/3237))
- add `--dir/-d` option to schema commands ([#3247](https://github.com/nf-core/tools/pull/3247))
- Update pre-commit hook astral-sh/ruff-pre-commit to v0.7.1 ([#3250](https://github.com/nf-core/tools/pull/3250))
- Add `downstream_samplesheets` to skip_features and GENERATE_DOWNSTREAM_SAMPLESHEETS to template ([#3261](https://github.com/nf-core/tools/pull/3261))

## [v3.0.2 - Titanium Tapir Patch](https://github.com/nf-core/tools/releases/tag/3.0.2) - [2024-10-11]

Expand Down
6 changes: 6 additions & 0 deletions nf_core/pipeline-template/conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,10 @@ params {
// Genome references
genome = 'R64-1-1'
{%- endif %}

{% if downstream_samplesheet -%}
// Downstream samplesheets
generate_downstream_samplesheets = true
generate_pipeline_samplesheets = 'rnaseq,sarek'
{%- endif %}
}
6 changes: 6 additions & 0 deletions nf_core/pipeline-template/conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,10 @@ params {
// Fasta references
fasta = params.pipelines_testdata_base_path + 'viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz'
{%- endif %}

{% if downstream_samplesheet -%}
// Downstream samplesheets
generate_downstream_samplesheets = true
generate_pipeline_samplesheets = 'sarek,rnaseq'
{%- endif %}
}
7 changes: 7 additions & 0 deletions nf_core/pipeline-template/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ params {
{% if citations %}multiqc_methods_description = null{% endif %}
{%- endif %}

{% if downstream_samplesheet -%}
// Downstream samplesheet generation
generate_downstream_samplesheets = false
generate_pipeline_samplesheets = null
generate_pipeline_samplesheets_format = 'csv'
{%- endif %}

// Boilerplate options
outdir = null
{% if modules %}publish_dir_mode = 'copy'{% endif %}
Expand Down
24 changes: 24 additions & 0 deletions nf_core/pipeline-template/nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,30 @@
}
},
{%- endif %}
{% if downstream_samplesheet %}
"generate_samplesheet_options": {
"title": "Downstream pipeline samplesheet generation options",
"type": "object",
"fa_icon": "fas fa-university",
"description": "Options for generating input samplesheets for complementary downstream pipelines.",
"properties": {
"generate_downstream_samplesheets": {
"type": "boolean",
"description": "Turn on generation of samplesheets for downstream pipelines."
},
"generate_pipeline_samplesheets": {
"type": "string",
"description": "Specify a comma separated string in quotes to specify which pipeline to generate a samplesheet for.",
"pattern": "^(rnaseq|sarek)(?:,(sarek|rnaseq)){0,1}"
},
"generate_pipeline_samplesheets_format": {
"type": "string",
"description": "Specify the output format of the samplesheet.",
"enum": ["txt", "tsv", "csv"],
}
}
},
{%- endif %}
{%- if nf_core_configs %}
"institutional_config_options": {
"title": "Institutional config options",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@



/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
SUBWORKFLOW SPECIFIC FOR RNASEQ
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// TODO nf-core: Update the following workflow to a specific pipeline
workflow SAMPLESHEET_RNASEQ {
take:
ch_reads
format

main:

//TODO nf-core: customise to your needs
ch_list_for_samplesheet = ch_reads.map { meta, reads ->
//TODO nf-core: Update the path to the published output directory of the reads
def out_path = file(params.outdir).toString() + '/relative/custom/path/'
def sample = meta.id
def fastq_1 = meta.single_end ? out_path + reads.getName() : out_path + reads[0].getName()
def fastq_2 = !meta.single_end ? out_path + reads[1].getName() : ""
def strandedness = "auto"
[sample: sample, fastq_1: fastq_1, fastq_2: fastq_2, strandedness: strandedness]
}

channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/rnaseq", format)
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
SUBWORKFLOW SPECIFIC FOR SAREK
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

// TODO nf-core: Update the following workflow to a specific pipeline
workflow SAMPLESHEET_SAREK {
take:
ch_reads
format

main:

//TODO nf-core: customise to your needs
ch_list_for_samplesheet = ch_reads.map { meta, reads ->
//TODO nf-core: Update the path to the published output directory of the reads
def out_path = file(params.outdir).toString() + '/relative/custom/path/'
def patient = meta.id
def sample = meta.id
def lane = ""
def fastq_1 = meta.single_end ? out_path + reads.getName() : out_path + reads[0].getName()
def fastq_2 = !meta.single_end ? out_path + reads[1].getName() : ""
[ patient: patient, sample: sample, lane: lane, fastq_1: fastq_1, fastq_2: fastq_2 ]
}

channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/sarek", format)
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
SUBWORKFLOW CALLING PIPELINE SPECIFIC SAMPLESHEET GENERATION
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
take:
input

main:
def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")

// TODO nf-coee: Add more pipelines here
if (downstreampipeline_names.contains('rnaseq')) {
SAMPLESHEET_RNASEQ(
input,
params.generate_pipeline_samplesheets_format
)
}

if (downstreampipeline_names.contains('rnaseq')) {
SAMPLESHEET_SAREK(
input,
params.generate_pipeline_samplesheets_format
)
}
}

// Input can be any channel with a dictionary
def channelToSamplesheet(ch_list_for_samplesheet, path, format) {
def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format]

def ch_header = ch_list_for_samplesheet

ch_header
.first()
.map { it.keySet().join(format_sep) }
.concat(ch_list_for_samplesheet.map { it.values().join(format_sep) })
.collectFile(
name: "${path}.${format}",
newLine: true,
sort: false
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@

nextflow_function {

name "Test Functions"
script "../main.nf"
tag 'subworkflows'
tag 'generate_downstream_samplesheets'
tag 'subworkflows/generate_downstream_samplesheets'

test("Test Function channelToSamplesheet - csv") {

function "channelToSamplesheet"

when {
function {
"""
// define inputs of the function here. Example:
input[0] = Channel.of(
[
sample: 'test-pe',
fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
strandedness: 'auto'
],
[
sample: 'test-se',
fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
fastq_2: '',
strandedness: 'auto'
])
input[1] = "$outputDir/test.csv"
input[2] = "csv"
"""
}
}

then {
assertAll(
{ assert function.success },
{ assert snapshot(function.result).match() }
)
}
}

test("Test Function channelToSamplesheet - tsv") {

function "channelToSamplesheet"

when {
function {
"""
// define inputs of the function here. Example:
input[0] = Channel.of(
[
sample: 'test-pe',
fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
fastq_2: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
strandedness: 'auto'
],
[
sample: 'test-se',
fastq_1: 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/illumina/fastq/test_1.fastq.gz',
fastq_2: '',
strandedness: 'auto'
])
input[1] = "$outputDir/test.tsv"
input[2] = "tsv"
"""
}
}

then {
assertAll(
{ assert function.success },
{ assert snapshot(function.result).match() }
)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"Test Function channelToSamplesheet - tsv": {
"content": [
{

}
],
"meta": {
"nf-test": "0.9.1",
"nextflow": "24.10.0"
},
"timestamp": "2024-10-30T10:06:13.403158303"
},
"Test Function channelToSamplesheet - csv": {
"content": [
{

}
],
"meta": {
"nf-test": "0.9.1",
"nextflow": "24.10.0"
},
"timestamp": "2024-10-30T10:06:02.487840724"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
nextflow_workflow {

name "Test Workflow GENERATE_DOWNSTREAM_SAMPLESHEETS"
script "../main.nf"
workflow "GENERATE_DOWNSTREAM_SAMPLESHEETS"
tag 'subworkflows'
tag 'generate_downstream_samplesheets'
tag 'subworkflows/generated_downstream_samplesheets'

test("Test worfklow rnaseq,sarek") {
when {
params {
outdir = "."
generate_pipeline_samplesheets = 'rnaseq,sarek'
generate_pipeline_samplesheets_format = 'csv'
}
workflow {
"""
input[0] = Channel.of(
[
[ id:'test', single_end:false ],
[file('test_1.fastq.gz', checkIfExists: false), file('test_2.fastq.gz', checkIfExists: false)]
],
[
[id: 'test-se', single_end: true],
file('test_1.fastq.gz', checkIfExists: false)
]
)
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(
[
"${params.outdir}/downstream_samplesheets/rnaseq.csv",
"${params.outdir}/downstream_samplesheets/sarek.csv"
]).match()
}
)
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"Test worfklow rnaseq,sarek": {
"content": [
[
"./downstream_samplesheets/rnaseq.csv",
"./downstream_samplesheets/sarek.csv"
]
],
"meta": {
"nf-test": "0.9.1",
"nextflow": "24.10.0"
},
"timestamp": "2024-10-31T15:44:42.743679838"
}
}
13 changes: 13 additions & 0 deletions nf_core/pipeline-template/workflows/pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
{% if multiqc %}include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'{% endif %}
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
{% if citations or multiqc %}include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_{{ short_name }}_pipeline'{% endif %}
{% if downstream_samplesheet %}include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets'{% endif %}
{%- endif %}

/*
Expand Down Expand Up @@ -41,6 +42,18 @@ workflow {{ short_name|upper }} {
ch_versions = ch_versions.mix(FASTQC.out.versions.first())
{%- endif %}


{% if downstream_samplesheet %}
//
// SUBWORKFLOW: Generate downstream samplesheets
//
if (params.generate_downstream_samplesheets) {
GENERATE_DOWNSTREAM_SAMPLESHEETS(
ch_samplesheet
)
}
{% endif %}

//
// Collate and save software versions
//
Expand Down
11 changes: 11 additions & 0 deletions nf_core/pipelines/create/template_features.yml
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,17 @@ modules:
- "modules.json"
nfcore_pipelines: False
custom_pipelines: True
downstream_samplesheet:
skippable_paths:
- "subworkflows/local/generate_downstream_samplesheets"
short_description: "Generate downstream samplesheets"
description: "The pipeline will include the generate_downstream_samplesheets subworkflow for the generation of a samplesheet for other downstream pipelines."
help_text: |
The pipeline will include the `GENERATE_DOWNSTREAM_SAMPLESHEETS` subworkflow.

The subworkflow `GENERATE_DOWNSTREAM_SAMPLESHEETS` provides a base template for generating samplesheets by taking a specified input channel, *eg:* reads or fasta, and extracts its metadata for generating samplesheets.
nfcore_pipelines: True
custom_pipelines: True
changelog:
skippable_paths:
- "CHANGELOG.md"
Expand Down
Loading