diff --git a/.nf-core.yml b/.nf-core.yml index 3805dc81..c025b49e 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1,13 @@ repository_type: pipeline +lint: + files_exist: + - lib/NfcoreTemplate.groovy + - lib/Utils.groovy + - lib/WorkflowMain.groovy + - lib/WorkflowRnavar.groovy + files_unchanged: + - .github/CONTRIBUTING.md + - .github/workflows/branch.yml + - .github/workflows/linting.yml + - .github/workflows/linting_comment.yml + - docs/images/nf-core-rnavar_logo_light.png diff --git a/README.md b/README.md index 327a633b..64a0458b 100644 --- a/README.md +++ b/README.md @@ -58,9 +58,6 @@ > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - Now, you can run the pipeline using: ```console @@ -96,12 +91,23 @@ For more details about the output files and reports, please refer to the nf-core/rnavar was originally written in Nextflow DSL2 for use at the [Barntumörbanken, Karolinska Institutet](https://ki.se/forskning/barntumorbanken), by Praveen Raj ([@praveenraj2018](https://github.com/praveenraj2018)) and Maxime U Garcia ([@maxulysse](https://github.com/maxulysse)). -The pipeline is primarily maintained by Praveen Raj ([@praveenraj2018](https://github.com/praveenraj2018)) from [Barntumörbanken, Karolinska Institutet](https://ki.se/forskning/barntumorbanken) and Maxime U Garcia ([@maxulysse](https://github.com/maxulysse)) from [Seqera Labs](https://seqera/io) +nf-core/rnavar was originally written by Praveen Raj at [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken). +Maxime U Garcia at [The Swedish Childhood Tumor Biobank (Barntumörbanken)](https://ki.se/forskning/barntumorbanken) helped with development. + +Maintenance is now lead by Maxime U Garcia (now at [Seqera Labs](https://seqera/io)) + +Main developers: + +- [Maxime U Garcia](https://github.com/maxulysse) + +We thank the following people for their extensive assistance in the development of this pipeline: -Many thanks to other who have helped out along the way too, including (but not limited to): -[@ewels](https://github.com/ewels), -[@drpatelh](https://github.com/drpatelh), -[@nschcolnicov](https://github.com/nschcolnicov). +- [Harshil Patel](https://github.com/drpatelh) +- [Nicolás Schcolnicov](https://github.com/nschcolnicov) +- [Ömer An](https://github.com/bounlu) +- [Phil Ewels](https://github.com/ewels) +- [Praveen Raj](https://github.com/praveenraj2018) +- [Sarah Maman](https://github.com/SarahMaman) ## Contributions and Support diff --git a/conf/modules.config b/conf/modules.config index 34c689ae..98e7d992 100755 --- a/conf/modules.config +++ b/conf/modules.config @@ -291,25 +291,12 @@ process { withName: GATK4_HAPLOTYPECALLER { ext.args = [ '--dont-use-soft-clipped-bases', + '--create-output-variant-index true', params.gatk_hc_call_conf ? "--standard-min-confidence-threshold-for-calling $params.gatk_hc_call_conf" : '', - params.bam_csi_index ? "--create-output-variant-index false" : "" + params.generate_gvcf ? "-ERC GVCF" : "" ].join(' ').trim() publishDir = [ enabled: false ] } - withName: GATK4_HAPLOTYPECALLERGVCF { - ext.args = [ - '--dont-use-soft-clipped-bases', - params.gatk_hc_call_conf ? "--standard-min-confidence-threshold-for-calling $params.gatk_hc_call_conf" : '', - params.bam_csi_index ? "--create-output-variant-index false" : "", "-ERC GVCF" - ].join(' ').trim() - ext.prefix = {"${meta.id}.haplotypecaller"} - publishDir = [ enabled: false ] - } - withName: GATK4_INDEXFEATUREFILE { - publishDir = [ - enabled: false - ] - } withName: GATK4_MERGEVCFS { ext.prefix = {"${meta.id}.haplotypecaller"} diff --git a/modules.json b/modules.json index 2e9f7ab2..097fe61b 100755 --- a/modules.json +++ b/modules.json @@ -8,289 +8,202 @@ "bcftools/concat": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "bcftools/pluginscatter": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "bcftools/sort": { "branch": "master", "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", - "installed_by": [ - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["vcf_annotate_ensemblvep_snpeff"] }, "bedtools/merge": { "branch": "master", "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bedtools/sort": { "branch": "master", "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "ensemblvep/download": { "branch": "master", "git_sha": "214d575774c172062924ad3564b4f66655600730", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "ensemblvep/vep": { "branch": "master", "git_sha": "214d575774c172062924ad3564b4f66655600730", - "installed_by": [ - "modules", - "vcf_annotate_ensemblvep", - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["modules", "vcf_annotate_ensemblvep", "vcf_annotate_ensemblvep_snpeff"] }, "fastqc": { "branch": "master", "git_sha": "c9488585ce7bd35ccd2a30faa2371454c8112fb9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/applybqsr": { "branch": "master", "git_sha": "f1b92874e0de01dd82fe1bf7c6a7409673578065", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/baserecalibrator": { "branch": "master", "git_sha": "85561f483acc3bc6a885667ea431b3bb3fccdecb", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/bedtointervallist": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/combinegvcfs": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/createsequencedictionary": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/haplotypecaller": { "branch": "master", "git_sha": "eab2bd29e589bd05da2b47c9bf95ef10b9508699", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/indexfeaturefile": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/intervallisttools": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/markduplicates": { "branch": "master", "git_sha": "bc93a4b8904472ce64c6bb50b1fb0a1659b9cf78", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/mergevcfs": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/splitncigarreads": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gatk4/variantfiltration": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gffread": { "branch": "master", "git_sha": "a2d6c3082c5c44b4155a3246daff36701ee49af8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "gunzip": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mosdepth": { "branch": "master", "git_sha": "69e3eb17fb31b772b18f134d6e8f8b93ee980e65", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "8ec825f465b9c17f9d83000022995b4f7de6fe93", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/idxstats": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/index": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/merge": { "branch": "master", - "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["modules"] }, "samtools/sort": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "snpeff/download": { "branch": "master", "git_sha": "214d575774c172062924ad3564b4f66655600730", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "snpeff/snpeff": { "branch": "master", "git_sha": "214d575774c172062924ad3564b4f66655600730", - "installed_by": [ - "modules", - "vcf_annotate_ensemblvep_snpeff", - "vcf_annotate_snpeff" - ] + "installed_by": ["modules", "vcf_annotate_ensemblvep_snpeff", "vcf_annotate_snpeff"] }, "star/align": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "star/genomegenerate": { "branch": "master", "git_sha": "d87a6e2156c2099c09280fa70776eaf0a824817a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "tabix/bgziptabix": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules", - "vcf_annotate_snpeff" - ] + "installed_by": ["modules", "vcf_annotate_snpeff"] }, "tabix/tabix": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules", - "vcf_annotate_ensemblvep", - "vcf_annotate_ensemblvep_snpeff" - ] + "installed_by": ["modules", "vcf_annotate_ensemblvep", "vcf_annotate_ensemblvep_snpeff"] }, "untar": { "branch": "master", "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -299,26 +212,20 @@ "vcf_annotate_ensemblvep": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "vcf_annotate_ensemblvep_snpeff": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "vcf_annotate_snpeff": { "branch": "master", "git_sha": "cfd937a668919d948f6fcbf4218e79de50c2f36f", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} diff --git a/modules/nf-core/samtools/merge/environment.yml b/modules/nf-core/samtools/merge/environment.yml index 1f7fe49f..fc669b1b 100644 --- a/modules/nf-core/samtools/merge/environment.yml +++ b/modules/nf-core/samtools/merge/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.18 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/merge/main.nf b/modules/nf-core/samtools/merge/main.nf index e104b90a..a3048c28 100644 --- a/modules/nf-core/samtools/merge/main.nf +++ b/modules/nf-core/samtools/merge/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_MERGE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' : - 'biocontainers/samtools:1.18--h50ea8bc_1' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input_files, stageAs: "?/*") diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test b/modules/nf-core/samtools/merge/tests/main.nf.test index 024f9f70..40b36e82 100644 --- a/modules/nf-core/samtools/merge/tests/main.nf.test +++ b/modules/nf-core/samtools/merge/tests/main.nf.test @@ -9,21 +9,19 @@ nextflow_process { tag "samtools" tag "samtools/merge" - test("sarscov2 - [bam1, bam2, bam3], [], []") { + test("bams") { config "./index.config" when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - [ - file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) input[1] = [[],[]] input[2] = [[],[]] """ @@ -33,40 +31,35 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - file(process.out.csi[0][1]).name, - process.out.crai, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") }, + { assert snapshot(process.out.cram).match("bams_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_csi") }, + { assert snapshot(process.out.crai).match("bams_crai") }, + { assert snapshot(process.out.versions).match("bams_versions") } ) } - } - test("homo_sapiens - [cram1, cram2], fasta, fai") { + test("crams") { config "./index.config" when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - [ - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true), - file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_cram'], checkIfExists: true), - ] - ] - input[1] = [ - [id:'genome'], - file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) - ] - input[2] = [ - [id:'genome'], - file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.recalibrated.sorted.cram', checkIfExists: true) ] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) """ } } @@ -74,27 +67,24 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.cram[0][1]).name, - process.out.bam, - file(process.out.crai[0][1]).name, - process.out.csi, - process.out.versions - ).match() } + { assert snapshot(file(process.out.cram[0][1]).name).match("crams_cram") }, + { assert snapshot(process.out.bam).match("crams_bam") }, + { assert snapshot(file(process.out.crai[0][1]).name).match("crams_crai") }, + { assert snapshot(process.out.csi).match("crams_csi") }, + { assert snapshot(process.out.versions).match("crams_versions") } ) } - } - test("sarscov2 - bam, [], []") { + test("bam") { when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam'], checkIfExists: true), - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true) ] + ]) input[1] = [[],[]] input[2] = [[],[]] """ @@ -104,19 +94,16 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - process.out.crai, - process.out.csi, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.versions).match("bam_versions") } ) } - } - test("sarscov2 - [bam1, bam2, bam3], [], [] - stub") { + test("bams_stub") { config "./index.config" options "-stub" @@ -124,14 +111,12 @@ nextflow_process { when { process { """ - input[0] = [ - [ id: 'test' ], // meta map - [ - file(params.test_data['sarscov2']['illumina']['test_paired_end_methylated_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_single_end_sorted_bam'], checkIfExists: true) - ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.methylated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true) ] + ]) input[1] = [[],[]] input[2] = [[],[]] """ @@ -141,16 +126,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.bam[0][1]).name, - process.out.cram, - file(process.out.csi[0][1]).name, - process.out.crai, - process.out.versions - ).match() } + { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") }, + { assert snapshot(process.out.cram).match("bams_stub_cram") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bams_stub_csi") }, + { assert snapshot(process.out.crai).match("bams_stub_crai") }, + { assert snapshot(process.out.versions).match("bams_stub_versions") } ) } - } - } diff --git a/modules/nf-core/samtools/merge/tests/main.nf.test.snap b/modules/nf-core/samtools/merge/tests/main.nf.test.snap index 3ab57d81..f7da7699 100644 --- a/modules/nf-core/samtools/merge/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/merge/tests/main.nf.test.snap @@ -1,68 +1,228 @@ { - "sarscov2 - bam, [], []": { + "crams_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.647389" + }, + "bams_stub_cram": { "content": [ - "test.bam", [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.937013" + }, + "bams_crai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.928616" + }, + "bams_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.923289" + }, + "bams_cram": { + "content": [ [ - ], - [ - "versions.yml:md5,71986103374bdddb2e3093d20e7d06cb" ] ], - "timestamp": "2023-12-04T17:13:30.244841621" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.925716" }, - "sarscov2 - [bam1, bam2, bam3], [], [] - stub": { + "crams_csi": { "content": [ - "test.bam", [ - ], - "test.csi", + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.655959" + }, + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.319539" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:33.782637377" + }, + "bams_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:49:24.92719" + }, + "bams_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.940498" + }, + "bam_crai": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.328852" + }, + "bams_stub_versions": { + "content": [ [ - "versions.yml:md5,71986103374bdddb2e3093d20e7d06cb" + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" ] ], - "timestamp": "2023-12-04T17:10:14.861445721" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:42.594476052" }, - "homo_sapiens - [cram1, cram2], fasta, fai": { + "bam_cram": { "content": [ - "test.cram", [ - ], - "test.cram.crai", + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.324219" + }, + "bams_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.933153" + }, + "bams_versions": { + "content": [ + [ + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:04.805335656" + }, + "crams_bam": { + "content": [ [ - ], + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.650652" + }, + "crams_versions": { + "content": [ [ - "versions.yml:md5,71986103374bdddb2e3093d20e7d06cb" + "versions.yml:md5,52c62d4712f7af00eb962d090ca32fe4" ] ], - "timestamp": "2023-12-04T17:09:29.716002618" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:25.889394689" }, - "sarscov2 - [bam1, bam2, bam3], [], []": { + "bam_csi": { "content": [ - "test.bam", [ - ], - "test.bam.csi", + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:10.33292" + }, + "crams_crai": { + "content": [ + "test.cram.crai" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:00.653512" + }, + "bams_stub_crai": { + "content": [ [ - ], - [ - "versions.yml:md5,71986103374bdddb2e3093d20e7d06cb" ] ], - "timestamp": "2023-12-04T17:08:42.329973045" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:50:19.943839" } } \ No newline at end of file diff --git a/subworkflows/local/prepare_genome/main.nf b/subworkflows/local/prepare_genome/main.nf index 0ad0ef53..82d669ac 100755 --- a/subworkflows/local/prepare_genome/main.nf +++ b/subworkflows/local/prepare_genome/main.nf @@ -44,7 +44,7 @@ workflow PREPARE_GENOME { GATK4_CREATESEQUENCEDICTIONARY(ch_fasta) GFFREAD(ch_gff) - SAMTOOLS_FAIDX(ch_fasta, [['id':null], []]) + SAMTOOLS_FAIDX(ch_fasta, [['id':'genome'], []]) ch_gtf = ch_gtf.mix(GFFREAD.out.gtf) diff --git a/workflows/rnavar.nf b/workflows/rnavar.nf index 08a297a0..2b39b7f5 100755 --- a/workflows/rnavar.nf +++ b/workflows/rnavar.nf @@ -70,10 +70,9 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { GTF2BED } from '../modules/local/gtf2bed/main' - include { ALIGN_STAR } from '../subworkflows/local/align_star/main' // Align reads to genome and sort and index the alignment file include { BAM_MARKDUPLICATES } from '../subworkflows/local/bam_markduplicates/main' // Mark duplicates in the BAM file +include { GTF2BED } from '../modules/local/gtf2bed/main' include { PREPARE_CACHE } from '../subworkflows/local/prepare_cache/main' // Download annotation cache if needed include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome/main' // Build the genome index and other reference files include { RECALIBRATE } from '../subworkflows/local/recalibrate/main' // Estimate and correct systematic bias @@ -86,22 +85,21 @@ include { VCF_ANNOTATE_ALL } from '../subworkflows/local/vcf_annotate_all/main ======================================================================================== */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' -include { GATK4_BASERECALIBRATOR } from '../modules/nf-core/gatk4/baserecalibrator/main' -include { GATK4_BEDTOINTERVALLIST } from '../modules/nf-core/gatk4/bedtointervallist/main' -include { GATK4_INTERVALLISTTOOLS } from '../modules/nf-core/gatk4/intervallisttools/main' -include { GATK4_HAPLOTYPECALLER } from '../modules/nf-core/gatk4/haplotypecaller/main' -include { GATK4_HAPLOTYPECALLER as GATK4_HAPLOTYPECALLERGVCF } from '../modules/nf-core/gatk4/haplotypecaller/main' -include { GATK4_MERGEVCFS } from '../modules/nf-core/gatk4/mergevcfs/main' -include { GATK4_COMBINEGVCFS } from '../modules/nf-core/gatk4/combinegvcfs/main' -include { GATK4_INDEXFEATUREFILE } from '../modules/nf-core/gatk4/indexfeaturefile/main' -include { GATK4_VARIANTFILTRATION } from '../modules/nf-core/gatk4/variantfiltration/main' -include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' -include { TABIX_TABIX as TABIX } from '../modules/nf-core/tabix/tabix/main' -include { TABIX_TABIX as TABIXGVCF } from '../modules/nf-core/tabix/tabix/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { GATK4_BASERECALIBRATOR } from '../modules/nf-core/gatk4/baserecalibrator/main' +include { GATK4_BEDTOINTERVALLIST } from '../modules/nf-core/gatk4/bedtointervallist/main' +include { GATK4_COMBINEGVCFS } from '../modules/nf-core/gatk4/combinegvcfs/main' +include { GATK4_HAPLOTYPECALLER } from '../modules/nf-core/gatk4/haplotypecaller/main' +include { GATK4_INDEXFEATUREFILE } from '../modules/nf-core/gatk4/indexfeaturefile/main' +include { GATK4_INTERVALLISTTOOLS } from '../modules/nf-core/gatk4/intervallisttools/main' +include { GATK4_MERGEVCFS } from '../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_VARIANTFILTRATION } from '../modules/nf-core/gatk4/variantfiltration/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' +include { TABIX_TABIX as TABIX } from '../modules/nf-core/tabix/tabix/main' +include { TABIX_TABIX as TABIXGVCF } from '../modules/nf-core/tabix/tabix/main' /* ======================================================================================== @@ -354,6 +352,7 @@ workflow RNAVAR { // Generates a recalibration table based on various co-variates // ch_bam_variant_calling = Channel.empty() + if (!params.skip_baserecalibration) { ch_bqsr_table = Channel.empty() // known_sites is made by grouping both the dbsnp and the known indels ressources @@ -436,7 +435,6 @@ workflow RNAVAR { ch_dbsnp_tbi.map{ it -> [[id:it.baseName], it] } ) - ch_haplotypecaller_raw = GATK4_HAPLOTYPECALLER.out.vcf .map{ meta, vcf -> meta.id = meta.sample @@ -445,58 +443,96 @@ workflow RNAVAR { ch_versions = ch_versions.mix(GATK4_HAPLOTYPECALLER.out.versions.first().ifEmpty(null)) - // - // MODULE: MergeVCFS from GATK4 - // Merge multiple VCF files into one VCF - // - GATK4_MERGEVCFS(ch_haplotypecaller_raw, ch_dict) - ch_haplotypecaller_vcf = GATK4_MERGEVCFS.out.vcf - ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions.first().ifEmpty(null)) + if (!params.generate_gvcf){ + // + // MODULE: MergeVCFS from GATK4 + // Merge multiple VCF files into one VCF + // + GATK4_MERGEVCFS( + ch_haplotypecaller_raw, + PREPARE_GENOME.out.dict + ) + ch_haplotypecaller_vcf = GATK4_MERGEVCFS.out.vcf + ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions.first().ifEmpty(null)) - if (params.generate_gvcf){ - GATK4_HAPLOTYPECALLERGVCF( - ch_haplotypecaller_interval_bam, - ch_fasta.map{ meta, fasta -> [fasta] }, - ch_fasta_fai, - ch_dict.map{ meta, dict -> [dict] }, - ch_dbsnp, - ch_dbsnp_tbi + // + // MODULE: Index the VCF using TABIX + // + TABIX( + ch_haplotypecaller_vcf ) - ch_haplotypecallergvcf_raw = GATK4_HAPLOTYPECALLERGVCF.out.vcf - .map{ meta, vcf -> - def new_meta = meta.clone() - new_meta.id = meta.sample - [new_meta, vcf] - }.groupTuple() + ch_haplotypecaller_vcf_tbi = ch_haplotypecaller_vcf + .join(TABIX.out.tbi, by: [0], remainder: true) + .join(TABIX.out.csi, by: [0], remainder: true) + .map{meta, vcf, tbi, csi -> + if (tbi) [meta, vcf, tbi] + else [meta, vcf, csi] + } + + ch_versions = ch_versions.mix(TABIX.out.versions.first().ifEmpty(null)) + ch_final_vcf = ch_haplotypecaller_vcf - ch_versions = ch_versions.mix(GATK4_HAPLOTYPECALLERGVCF.out.versions.first().ifEmpty(null)) // - // MODULE: IndexFeatureFile from GATK4 - // Index the gVCF files + // MODULE: VariantFiltration from GATK4 + // Filter variant calls based on certain criteria // - GATK4_INDEXFEATUREFILE(GATK4_HAPLOTYPECALLERGVCF.out.vcf) + if (!params.skip_variantfiltration && !params.bam_csi_index ) { + + GATK4_VARIANTFILTRATION( + ch_haplotypecaller_vcf_tbi, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fasta_fai.map{ fasta_fai -> [[id:'genome'], fasta_fai]}, + PREPARE_GENOME.out.dict + ) + + ch_filtered_vcf = GATK4_VARIANTFILTRATION.out.vcf + ch_final_vcf = ch_filtered_vcf + ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION.out.versions.first().ifEmpty(null)) + } - ch_haplotypecallergvcf_raw_index = GATK4_INDEXFEATUREFILE.out.index - .map{ meta, idx -> - def new_meta = meta.clone() - new_meta.id = meta.sample - [new_meta, idx] - }.groupTuple() + // + // SUBWORKFLOW: Annotate variants using snpEff and Ensembl VEP if enabled. + // + if((!params.skip_variantannotation) && (params.annotate_tools) && (params.annotate_tools.contains('merge') || params.annotate_tools.contains('snpeff') || params.annotate_tools.contains('vep'))) { + + vep_fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []] + + VCF_ANNOTATE_ALL( + ch_final_vcf, + vep_fasta, + params.annotate_tools, + params.snpeff_genome ? "${params.snpeff_genome}.${params.snpeff_db}" : "${params.genome}.${params.snpeff_db}", + snpeff_cache, + vep_genome, + vep_species, + vep_cache_version, + vep_cache, + vep_extra_files) + + // Gather QC reports + ch_reports = ch_reports.mix(VCF_ANNOTATE_ALL.out.reports) + ch_versions = ch_versions.mix(VCF_ANNOTATE_ALL.out.versions.first().ifEmpty(null)) + } - ch_versions = ch_versions.mix(GATK4_INDEXFEATUREFILE.out.versions.first().ifEmpty(null)) + } + else{ + ch_haplotypecaller_raw_index = GATK4_HAPLOTYPECALLER.out.tbi + .map{ meta, idx -> + meta.id = meta.sample + [meta, idx]} + .groupTuple() + // // MODULE: CombineGVCFS from GATK4 // Merge multiple GVCF files into one GVCF - - ch_haplotypecallergvcf_raw_tbi = ch_haplotypecallergvcf_raw - .join(ch_haplotypecallergvcf_raw_index, remainder: true) - + // GATK4_COMBINEGVCFS( - ch_haplotypecallergvcf_raw_tbi, - ch_fasta.map{ meta, fasta -> [fasta] }, - ch_fasta_fai, - ch_dict.map{ meta, dict -> [dict] } + ch_haplotypecaller_raw, + ch_haplotypecaller_raw_index, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai, + PREPARE_GENOME.out.dict ) ch_haplotypecaller_gvcf = GATK4_COMBINEGVCFS.out.combined_gvcf ch_versions = ch_versions.mix(GATK4_COMBINEGVCFS.out.versions.first().ifEmpty(null)) @@ -507,8 +543,8 @@ workflow RNAVAR { TABIXGVCF(ch_haplotypecaller_gvcf) ch_haplotypecaller_gvcf_tbi = ch_haplotypecaller_gvcf - .join(TABIXGVCF.out.tbi, remainder: true) - .join(TABIXGVCF.out.csi, remainder: true) + .join(TABIXGVCF.out.tbi, by: [0], remainder: true) + .join(TABIXGVCF.out.csi, by: [0], remainder: true) .map{meta, vcf, tbi, csi -> if (tbi) [meta, vcf, tbi] else [meta, vcf, csi] @@ -517,65 +553,6 @@ workflow RNAVAR { ch_versions = ch_versions.mix(TABIXGVCF.out.versions.first().ifEmpty(null)) } - - // - // MODULE: Index the VCF using TABIX - // - TABIX(ch_haplotypecaller_vcf) - - ch_haplotypecaller_vcf_tbi = ch_haplotypecaller_vcf - .join(TABIX.out.tbi, remainder: true) - .join(TABIX.out.csi, remainder: true) - .map{meta, vcf, tbi, csi -> - if (tbi) [meta, vcf, tbi] - else [meta, vcf, csi] - } - - ch_versions = ch_versions.mix(TABIX.out.versions.first().ifEmpty(null)) - ch_final_vcf = ch_haplotypecaller_vcf - - // - // MODULE: VariantFiltration from GATK4 - // Filter variant calls based on certain criteria - // - if (!params.skip_variantfiltration && !params.bam_csi_index ) { - - GATK4_VARIANTFILTRATION( - ch_haplotypecaller_vcf_tbi, - ch_fasta, - ch_fasta_fai.map{ it -> [ [id:'fai'], it ] }, - ch_dict - ) - - ch_filtered_vcf = GATK4_VARIANTFILTRATION.out.vcf - ch_final_vcf = ch_filtered_vcf - ch_versions = ch_versions.mix(GATK4_VARIANTFILTRATION.out.versions.first().ifEmpty(null)) - } - - // - // SUBWORKFLOW: Annotate variants using snpEff and Ensembl VEP if enabled. - // - if ((!params.skip_variantannotation) &&(params.annotate_tools) && (params.annotate_tools.split(',').contains('merge') || params.annotate_tools.split(',').contains('snpeff') || params.annotate_tools.split(',').contains('vep'))) { - - vep_fasta = (params.vep_include_fasta) ? fasta.map{ fasta -> [ [ id:fasta.baseName ], fasta ] } : [[id: 'null'], []] - - VCF_ANNOTATE_ALL( - ch_final_vcf.map{meta, vcf -> [ meta + [ file_name: vcf.baseName ], vcf ] }, - vep_fasta, - params.annotate_tools, - params.snpeff_genome ? "${params.snpeff_genome}.${params.snpeff_db}" : "${params.genome}.${params.snpeff_db}", - snpeff_cache, - vep_genome, - vep_species, - vep_cache_version, - vep_cache, - vep_extra_files) - - // Gather QC reports - ch_reports = ch_reports.mix(VCF_ANNOTATE_ALL.out.reports) - ch_versions = ch_versions.mix(VCF_ANNOTATE_ALL.out.versions.first().ifEmpty(null)) - } - } ch_version_yaml = Channel.empty()