diff --git a/CHANGELOG.md b/CHANGELOG.md index d90cb63..5446abe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,8 @@ Initial release of nf-core/references, created with the [nf-core](https://nf-co. - [41](https://github.com/nf-core/references/pull/41) - Better sarek tests - [41](https://github.com/nf-core/references/pull/41) - Better publishing for sarek related files - [43](https://github.com/nf-core/references/pull/43) - Fasta is no longer a required asset +- [48](https://github.com/nf-core/references/pull/48) - Simplify VCF tabix index generation and related assets +- [48](https://github.com/nf-core/references/pull/48) - Code refactoring (new subworfklows for each type of operations) - [49](https://github.com/nf-core/references/pull/49) - Better publishing for all files ### Fixed diff --git a/assets/genomes/test/default_extended.yml b/assets/genomes/test/default_extended.yml index fc69d05..a51628b 100644 --- a/assets/genomes/test/default_extended.yml +++ b/assets/genomes/test/default_extended.yml @@ -1,22 +1,28 @@ - genome: "GRCh38_chr21" - dbsnp_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" fasta: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.fa" fasta_dict: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.dict" fasta_fai: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.fa.fai" fasta_sizes: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.fa.sizes" - germline_resource_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" gff: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/genes_chr21.gff" gtf: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.gtf" - known_indels_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" - known_snps_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" mito_name: "MT" readme: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/README.md" source: "nf-core/references" - source_dbsnp: "GATK_BUNDLE" - source_germline_resource: "GATK_BUNDLE" - source_known_indels: "GATK_BUNDLE" - source_known_snps: "GATK_BUNDLE" + source_vcf: "GATK_BUNDLE" species: "Homo_sapiens" splice_sites: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/genes_chr21.splice_sites.txt" transcript_fasta: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/genome.transcripts.fa" + vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" # macs_gsize: "1.2e7" +- genome: "GRCh38_chr21" + readme: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/README.md" + source: "nf-core/references" + source_vcf: "GATK_BUNDLE" + species: "Homo_sapiens" + vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" +- genome: "GRCh38_chr21" + readme: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/README.md" + source: "nf-core/references" + source_vcf: "GATK_BUNDLE" + species: "Homo_sapiens" + vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" diff --git a/assets/genomes/test/default_full.yml b/assets/genomes/test/default_full.yml index 27accf4..cc79561 100644 --- a/assets/genomes/test/default_full.yml +++ b/assets/genomes/test/default_full.yml @@ -1,27 +1,32 @@ - genome: "GRCh38_chr21" - dbsnp_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" - dbsnp_vcf_tbi: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" fasta: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.fa" fasta_dict: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.dict" fasta_fai: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.fa.fai" fasta_sizes: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.fa.sizes" - germline_resource_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" - germline_resource_vcf_tbi: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi" gff: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/genes_chr21.gff" gtf: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.gtf" intervals_bed: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/GRCh38_chr21.bed" - known_indels_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" - known_indels_vcf_tbi: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" - known_snps_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" - known_snps_vcf_tbi: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" mito_name: "MT" readme: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/README.md" source: "nf-core/references" - source_dbsnp: "GATK_BUNDLE" - source_germline_resource: "GATK_BUNDLE" - source_known_indels: "GATK_BUNDLE" - source_known_snps: "GATK_BUNDLE" + source_vcf: "GATK_BUNDLE" species: "Homo_sapiens" splice_sites: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/genes_chr21.splice_sites.txt" transcript_fasta: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/genome.transcripts.fa" + vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" + vcf_tbi: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz.tbi" # macs_gsize: "1.2e7" +- genome: "GRCh38_chr21" + readme: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/README.md" + source: "nf-core/references" + source_vcf: "GATK_BUNDLE" + species: "Homo_sapiens" + vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" + vcf_tbi: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz.tbi" +- genome: "GRCh38_chr21" + readme: "https://raw.githubusercontent.com/nf-core/test-datasets/references/references/GRCh38_chr21/README.md" + source: "nf-core/references" + source_vcf: "GATK_BUNDLE" + species: "Homo_sapiens" + vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" + vcf_tbi: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz.tbi" diff --git a/assets/genomes/test/pipelines/sarek.yml b/assets/genomes/test/pipelines/sarek.yml index f039b0f..c813acf 100644 --- a/assets/genomes/test/pipelines/sarek.yml +++ b/assets/genomes/test/pipelines/sarek.yml @@ -1,12 +1,16 @@ - genome: "testdata.GRCh38_chr22" - dbsnp_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" fasta: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/genome.fasta" - germline_resource_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" - known_indels_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" - known_snps_vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" + vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/dbsnp_146.hg38.vcf.gz" source: "nf-core/references" - source_dbsnp: "GATK_BUNDLE" - source_germline_resource: "GATK_BUNDLE" - source_known_indels: "GATK_BUNDLE" - source_known_snps: "GATK_BUNDLE" + source_vcf: "GATK_BUNDLE" species: "Homo_sapiens" +- genome: "testdata.GRCh38_chr22" + source_vcf: "GATK_BUNDLE" + species: "Homo_sapiens" + source: "nf-core/references" + vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/gnomAD.r2.1.1.vcf.gz" +- genome: "testdata.GRCh38_chr22" + source_vcf: "GATK_BUNDLE" + species: "Homo_sapiens" + source: "nf-core/references" + vcf: "https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/genome/vcf/mills_and_1000G.indels.vcf.gz" diff --git a/assets/genomes/test/pipelines/sarek_s3_muliple_glob.yml b/assets/genomes/test/pipelines/sarek_s3_muliple_glob.yml index 5eb4d04..23b1bd9 100644 --- a/assets/genomes/test/pipelines/sarek_s3_muliple_glob.yml +++ b/assets/genomes/test/pipelines/sarek_s3_muliple_glob.yml @@ -1,11 +1,11 @@ # from sarek igenomes.config - genome: GRCh37 - known_indels_vcf: "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" - source_known_indels: "GATK_BUNDLE" + vcf: "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh37/Annotation/GATKBundle/{1000G_phase1,Mills_and_1000G_gold_standard}.indels.b37.vcf.gz" + source_vcf: "GATK_BUNDLE" species: "Homo_sapiens" source: "GATK" - genome: GRCh38 - known_indels_vcf: "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" - source_known_indels: "GATK_BUNDLE" + vcf: "s3://ngi-igenomes/igenomes/Homo_sapiens/GATK/GRCh38/Annotation/GATKBundle/{Mills_and_1000G_gold_standard.indels.hg38,beta/Homo_sapiens_assembly38.known_indels}.vcf.gz" + source_vcf: "GATK_BUNDLE" species: "Homo_sapiens" source: "GATK" diff --git a/assets/schema_input.json b/assets/schema_input.json index 36293cc..784a882 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -18,25 +18,10 @@ "errorMessage": "Where the references came from", "meta": ["source"] }, - "source_dbsnp": { + "source_vcf": { "type": "string", "errorMessage": "Where the references came from", - "meta": ["source_dbsnp"] - }, - "source_germline_resource": { - "type": "string", - "errorMessage": "Where the references came from", - "meta": ["source_germline_resource"] - }, - "source_known_indels": { - "type": "string", - "errorMessage": "Where the references came from", - "meta": ["source_known_indels"] - }, - "source_known_snps": { - "type": "string", - "errorMessage": "Where the references came from", - "meta": ["source_known_snps"] + "meta": ["source_vcf"] }, "species": { "type": "string", @@ -88,22 +73,7 @@ "pattern": "^\\S+\\.f(ast|n)?a(\\.gz)?$", "errorMessage": "TODO" }, - "dbsnp_vcf": { - "type": "string", - "pattern": "^\\S+\\.vcf\\.gz$", - "errorMessage": "TODO" - }, - "known_snps_vcf": { - "type": "string", - "pattern": "^\\S+\\.vcf\\.gz$", - "errorMessage": "TODO" - }, - "known_indels_vcf": { - "type": "string", - "pattern": "^\\S+\\.vcf\\.gz$", - "errorMessage": "TODO" - }, - "germline_resource_vcf": { + "vcf": { "type": "string", "pattern": "^\\S+\\.vcf\\.gz$", "errorMessage": "TODO" diff --git a/main.nf b/main.nf index cad54fe..fd71eb9 100644 --- a/main.nf +++ b/main.nf @@ -102,19 +102,19 @@ workflow { } output { - 'bowtie1' { + 'bowtie1_index' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/BowtieIndex/" } } } - 'bowtie2' { + 'bowtie2_index' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/Bowtie2Index/" } } } - 'bwamem1' { + 'bwamem1_index' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/BWAIndex/" } } } - 'bwamem2' { + 'bwamem2_index' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/BWAmem2Index/" } } } - 'dragmap' { + 'dragmap_hashmap' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/dragmap/" } } } 'fasta' { @@ -129,56 +129,47 @@ output { 'fasta_sizes' { path { meta, sizes -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/WholeGenomeFasta/${file}" } } } - 'gffread' { + 'gtf' { path { meta, intervals -> { file -> "${meta.species}/${meta.source}/${meta.id}/Annotation/Genes/${file}" } } } - 'hisat2' { + 'hisat2_index' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/Hisat2Index/" } } } - 'intervals' { + 'intervals_bed' { path { meta, intervals -> { file -> "${meta.species}/${meta.source}/${meta.id}/Annotation/intervals/${file}" } } } - 'kallisto' { + 'kallisto_index' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/KallistoIndex/" } } } - 'msisensorpro' { + 'msisensorpro_list' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Annotation/msisensorpro/${file}" } } } 'multiqc_data' { - path 'multiqc' + path { folder -> { file -> "multiqc/multiqc_data" } } } 'multiqc_plots' { - path 'multiqc' + path { folder -> { file -> "multiqc/multiqc_plots" } } } 'multiqc_report' { - path 'multiqc' + path { folder -> { file -> "multiqc/multiqc_report" } } } - 'rsem' { + 'rsem_index' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/RSEMIndex/" } } } - 'salmon' { + 'salmon_index' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/SalmonIndex/" } } } 'splice_sites' { path { meta, txt -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/SpliceSites/${file}" } } } - 'star' { + 'star_index' { path { meta, index -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/STARIndex/" } } } 'transcript_fasta' { path { meta, fasta -> { file -> "${meta.species}/${meta.source}/${meta.id}/Sequence/TranscriptFasta/${file}" } } } - 'tabix_dbsnp' { - path { meta, vcf -> { file -> "${meta.species}/${meta.source}/${meta.id}/Annotation/${meta.source_dbsnp}/${file}" } } - } - 'tabix_germline_resource' { - path { meta, vcf -> { file -> "${meta.species}/${meta.source}/${meta.id}/Annotation/${meta.source_germline_resource}/${file}" } } - } - 'tabix_known_indels' { - path { meta, vcf -> { file -> "${meta.species}/${meta.source}/${meta.id}/Annotation/${meta.source_known_indels}/${file}" } } - } - 'tabix_known_snps' { - path { meta, vcf -> { file -> "${meta.species}/${meta.source}/${meta.id}/Annotation/${meta.source_known_snps}/${file}" } } + 'vcf_tbi' { + path { meta, tbi -> { file -> "${meta.species}/${meta.source}/${meta.id}/Annotation/${meta.source_vcf}/${file}" } } } } /* @@ -200,28 +191,25 @@ workflow NFCORE_REFERENCES { REFERENCES(input, tools) emit: - bowtie1 = REFERENCES.out.bowtie1 - bowtie2 = REFERENCES.out.bowtie2 - bwamem1 = REFERENCES.out.bwamem1 - bwamem2 = REFERENCES.out.bwamem2 - dbsnp_vcf_tbi = REFERENCES.out.dbsnp_vcf_tbi - dragmap = REFERENCES.out.dragmap - fasta = REFERENCES.out.fasta - fasta_dict = REFERENCES.out.fasta_dict - fasta_fai = REFERENCES.out.fasta_fai - germline_resource_vcf_tbi = REFERENCES.out.germline_resource_vcf_tbi - gffread = REFERENCES.out.gff_gtf - hisat2 = REFERENCES.out.hisat2 - hisat2_splice_sites = REFERENCES.out.hisat2_splice_sites - intervals = REFERENCES.out.intervals_bed - kallisto = REFERENCES.out.kallisto - known_indels_vcf_tbi = REFERENCES.out.known_indels_vcf_tbi - known_snps_vcf_tbi = REFERENCES.out.known_snps_vcf_tbi - msisensorpro = REFERENCES.out.msisensorpro - rsem = REFERENCES.out.rsem - rsem_transcript_fasta = REFERENCES.out.rsem_transcript_fasta - salmon = REFERENCES.out.salmon - sizes = REFERENCES.out.sizes - star = REFERENCES.out.star - versions = REFERENCES.out.versions + bowtie1_index = REFERENCES.out.bowtie1_index + bowtie2_index = REFERENCES.out.bowtie2_index + bwamem1_index = REFERENCES.out.bwamem1_index + bwamem2_index = REFERENCES.out.bwamem2_index + dragmap_hashmap = REFERENCES.out.dragmap_hashmap + fasta = REFERENCES.out.fasta + fasta_dict = REFERENCES.out.fasta_dict + fasta_fai = REFERENCES.out.fasta_fai + fasta_sizes = REFERENCES.out.fasta_sizes + gtf = REFERENCES.out.gtf + hisat2_index = REFERENCES.out.hisat2_index + splice_sites = REFERENCES.out.splice_sites + intervals_bed = REFERENCES.out.intervals_bed + kallisto_index = REFERENCES.out.kallisto_index + msisensorpro_list = REFERENCES.out.msisensorpro_list + rsem_index = REFERENCES.out.rsem_index + transcript_fasta = REFERENCES.out.transcript_fasta + salmon_index = REFERENCES.out.salmon_index + star_index = REFERENCES.out.star_index + vcf_tbi = REFERENCES.out.vcf_tbi + versions = REFERENCES.out.versions } diff --git a/subworkflows/local/create_align_index/main.nf b/subworkflows/local/create_align_index/main.nf new file mode 100644 index 0000000..d087b9f --- /dev/null +++ b/subworkflows/local/create_align_index/main.nf @@ -0,0 +1,67 @@ +include { BOWTIE_BUILD as BOWTIE1_BUILD } from '../../../modules/nf-core/bowtie/build' +include { BOWTIE2_BUILD } from '../../../modules/nf-core/bowtie2/build' +include { BWAMEM2_INDEX } from '../../../modules/nf-core/bwamem2/index' +include { BWA_INDEX as BWAMEM1_INDEX } from '../../../modules/nf-core/bwa/index' +include { DRAGMAP_HASHTABLE } from '../../../modules/nf-core/dragmap/hashtable' + +workflow CREATE_ALIGN_INDEX { + take: + fasta // channel: [meta, fasta] + run_bowtie1 // boolean: true/false + run_bowtie2 // boolean: true/false + run_bwamem1 // boolean: true/false + run_bwamem2 // boolean: true/false + run_dragmap // boolean: true/false + + main: + bowtie1_index = Channel.empty() + bowtie2_index = Channel.empty() + bwamem1_index = Channel.empty() + bwamem2_index = Channel.empty() + dragmap_hashmap = Channel.empty() + + versions = Channel.empty() + + if (run_bowtie1) { + BOWTIE1_BUILD(fasta) + + bowtie1_index = BOWTIE1_BUILD.out.index + versions = versions.mix(BOWTIE1_BUILD.out.versions) + } + + if (run_bowtie2) { + BOWTIE2_BUILD(fasta) + + bowtie2_index = BOWTIE2_BUILD.out.index + versions = versions.mix(BOWTIE2_BUILD.out.versions) + } + + if (run_bwamem1) { + BWAMEM1_INDEX(fasta) + + bwamem1_index = BWAMEM1_INDEX.out.index + versions = versions.mix(BWAMEM1_INDEX.out.versions) + } + + if (run_bwamem2) { + BWAMEM2_INDEX(fasta) + + bwamem2_index = BWAMEM2_INDEX.out.index + versions = versions.mix(BWAMEM2_INDEX.out.versions) + } + + if (run_dragmap) { + DRAGMAP_HASHTABLE(fasta) + + dragmap_hashmap = DRAGMAP_HASHTABLE.out.hashmap + versions = versions.mix(DRAGMAP_HASHTABLE.out.versions) + } + + emit: + bowtie1_index // channel: [meta, BowtieIndex/] + bowtie2_index // channel: [meta, Bowtie2Index/] + bwamem1_index // channel: [meta, BWAmemIndex/] + bwamem2_index // channel: [meta, BWAmem2memIndex/] + dragmap_hashmap // channel: [meta, DragmapHashtable/] + versions // channel: [versions.yml] +} diff --git a/subworkflows/local/create_align_index_with_gff/main.nf b/subworkflows/local/create_align_index_with_gff/main.nf new file mode 100644 index 0000000..0ca28be --- /dev/null +++ b/subworkflows/local/create_align_index_with_gff/main.nf @@ -0,0 +1,132 @@ +include { GFFREAD } from '../../../modules/nf-core/gffread' +include { HISAT2_BUILD } from '../../../modules/nf-core/hisat2/build' +include { HISAT2_EXTRACTSPLICESITES } from '../../../modules/nf-core/hisat2/extractsplicesites' +include { KALLISTO_INDEX } from '../../../modules/nf-core/kallisto/index' +include { RSEM_PREPAREREFERENCE as MAKE_TRANSCRIPTS_FASTA } from '../../../modules/nf-core/rsem/preparereference' +include { RSEM_PREPAREREFERENCE as RSEM_PREPAREREFERENCE_GENOME } from '../../../modules/nf-core/rsem/preparereference' +include { SALMON_INDEX } from '../../../modules/nf-core/salmon/index' +include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate' + +workflow CREATE_ALIGN_INDEX_WITH_GFF { + take: + fasta // channel: [meta, fasta] + input_gff // channel: [meta, gff] + input_gtf // channel: [meta, gtf] + input_splice_sites // channel: [meta, splice_sites] + input_transcript_fasta // channel: [meta, transcript_fasta] + run_hisat2 // boolean: true/false + run_hisat2_extractsplicesites // boolean: true/false + run_kallisto // boolean: true/false + run_rsem // boolean: true/false + run_rsem_make_transcript_fasta // boolean: true/false + run_salmon // boolean: true/false + run_star // boolean: true/false + + main: + gtf = Channel.empty() + hisat2_index = Channel.empty() + kallisto_index = Channel.empty() + rsem_index = Channel.empty() + salmon_index = Channel.empty() + splice_sites = Channel.empty() + star_index = Channel.empty() + transcript_fasta = Channel.empty() + + versions = Channel.empty() + + if (run_hisat2 || run_kallisto || run_rsem || run_rsem_make_transcript_fasta || run_salmon || run_star) { + + GFFREAD( + input_gff, + [] + ) + + versions = versions.mix(GFFREAD.out.versions) + + gtf = input_gtf + .mix(GFFREAD.out.gtf) + .groupTuple() + .map { meta, file -> + return file[1] ? [meta, file[1]] : [meta, file] + } + + if (run_hisat2 || run_hisat2_extractsplicesites) { + gtf_hisat2 = gtf.map { meta, map_gtf -> + return meta.run_hisat2 ? [meta, map_gtf] : null + } + + HISAT2_EXTRACTSPLICESITES(gtf_hisat2) + + splice_sites = input_splice_sites.mix(HISAT2_EXTRACTSPLICESITES.out.txt) + + if (run_hisat2) { + HISAT2_BUILD( + fasta, + gtf, + splice_sites + ) + + hisat2_index = HISAT2_BUILD.out.index + + versions = versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions) + versions = versions.mix(HISAT2_BUILD.out.versions) + } + } + + if (run_kallisto || run_rsem_make_transcript_fasta || run_salmon) { + fasta_make_transcripts_fasta = fasta.map { meta, map_fasta -> + return meta.run_rsem_make_transcript_fasta ? [meta, map_fasta] : null + } + + MAKE_TRANSCRIPTS_FASTA( + fasta_make_transcripts_fasta, + gtf + ) + versions = versions.mix(MAKE_TRANSCRIPTS_FASTA.out.versions) + + transcript_fasta = input_transcript_fasta.mix(MAKE_TRANSCRIPTS_FASTA.out.transcript_fasta) + + if (run_kallisto) { + KALLISTO_INDEX(transcript_fasta) + + kallisto_index = KALLISTO_INDEX.out.index + versions = versions.mix(KALLISTO_INDEX.out.versions) + } + + if (run_salmon) { + SALMON_INDEX( + fasta, + transcript_fasta + ) + + salmon_index = SALMON_INDEX.out.index + versions = versions.mix(SALMON_INDEX.out.versions) + } + } + + if (run_rsem) { + RSEM_PREPAREREFERENCE_GENOME(fasta, gtf) + + rsem_index = RSEM_PREPAREREFERENCE_GENOME.out.index + versions = versions.mix(RSEM_PREPAREREFERENCE_GENOME.out.versions) + } + + if (run_star) { + STAR_GENOMEGENERATE(fasta, gtf) + + star_index = STAR_GENOMEGENERATE.out.index + versions = versions.mix(STAR_GENOMEGENERATE.out.versions) + } + } + + emit: + gtf // channel: [meta, gtf] + hisat2_index // channel: [meta, Hisat2Index/] + kallisto_index // channel: [meta, KallistoIndex] + rsem_index // channel: [meta, RSEMIndex/] + salmon_index // channel: [meta, SalmonIndex/] + splice_sites // channel: [meta, *.splice_sites.txt] + star_index // channel: [meta, STARIndex/] + transcript_fasta // channel: [meta, *.transcripts.fasta] + versions // channel: [versions.yml] +} diff --git a/subworkflows/local/index_fasta/main.nf b/subworkflows/local/index_fasta/main.nf new file mode 100644 index 0000000..ab8e9b5 --- /dev/null +++ b/subworkflows/local/index_fasta/main.nf @@ -0,0 +1,72 @@ +include { GATK4_CREATESEQUENCEDICTIONARY } from '../../../modules/nf-core/gatk4/createsequencedictionary' +include { GAWK as BUILD_INTERVALS } from '../../../modules/nf-core/gawk' +include { MSISENSORPRO_SCAN } from '../../../modules/nf-core/msisensorpro/scan' +include { SAMTOOLS_FAIDX } from '../../../modules/nf-core/samtools/faidx' + +workflow INDEX_FASTA { + take: + fasta // channel: [meta, fasta] + input_fasta_fai // channel: [meta, fasta_fai] + run_createsequencedictionary // boolean: true/false + run_faidx // boolean: true/false + run_intervals // boolean: true/false + run_msisensorpro // boolean: true/false + run_sizes // boolean: true/false + + main: + intervals_bed = Channel.empty() + fasta_fai = Channel.empty() + fasta_dict = Channel.empty() + fasta_sizes = Channel.empty() + msisensorpro_list = Channel.empty() + + versions = Channel.empty() + + if (run_createsequencedictionary) { + GATK4_CREATESEQUENCEDICTIONARY(fasta) + + fasta_dict = GATK4_CREATESEQUENCEDICTIONARY.out.dict + versions = versions.mix(GATK4_CREATESEQUENCEDICTIONARY.out.versions) + } + + if (run_faidx || run_intervals || run_sizes) { + fasta_samtools = fasta.map { meta, map_fasta -> + return meta.run_faidx ? [meta, map_fasta] : null + } + + SAMTOOLS_FAIDX( + fasta_samtools, + [[id: 'no_fai'], []], + run_sizes + ) + + fasta_fai = input_fasta_fai.mix(SAMTOOLS_FAIDX.out.fai) + fasta_sizes = SAMTOOLS_FAIDX.out.sizes + versions = versions.mix(SAMTOOLS_FAIDX.out.versions) + + if (run_intervals) { + fasta_fai_intervals = fasta_fai.map { meta, map_fasta_fai -> + return meta.run_intervals ? [meta, map_fasta_fai] : null + } + + BUILD_INTERVALS(fasta_fai_intervals, []) + intervals_bed = BUILD_INTERVALS.out.output + versions = versions.mix(BUILD_INTERVALS.out.versions) + } + } + + if (run_msisensorpro) { + MSISENSORPRO_SCAN(fasta) + + msisensorpro_list = MSISENSORPRO_SCAN.out.list + versions = versions.mix(MSISENSORPRO_SCAN.out.versions) + } + + emit: + fasta_dict // channel: [meta, *.fa(sta).dict] + fasta_fai // channel: [meta, *.fa(sta).fai] + fasta_sizes // channel: [meta, *.fa(sta).sizes] + intervals_bed // channel: [meta, *.bed] + msisensorpro_list // channel: [meta, *.list] + versions // channel: [versions.yml] +} diff --git a/subworkflows/local/index_vcf/main.nf b/subworkflows/local/index_vcf/main.nf new file mode 100644 index 0000000..48bab94 --- /dev/null +++ b/subworkflows/local/index_vcf/main.nf @@ -0,0 +1,23 @@ +include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix' + +workflow INDEX_VCF { + take: + vcf // channel: [meta, vcf] + run_tabix // boolean: true/false + + main: + vcf_tbi = Channel.empty() + versions = Channel.empty() + + + if (run_tabix) { + TABIX_TABIX(vcf) + + vcf_tbi = TABIX_TABIX.out.tbi + versions = TABIX_TABIX.out.versions + } + + emit: + vcf_tbi // channel: [meta, *.vcf.tbi] + versions // channel: [versions.yml] +} diff --git a/subworkflows/local/samplesheet_to_channel/main.nf b/subworkflows/local/samplesheet_to_channel/main.nf new file mode 100644 index 0000000..611771c --- /dev/null +++ b/subworkflows/local/samplesheet_to_channel/main.nf @@ -0,0 +1,60 @@ +workflow SAMPLESHEET_TO_CHANNEL { + take: + reference // channel: [meta, intervals_bed, fasta, fasta_dict, fasta_fai, fasta_sizes, gff, gtf, splice_sites, transcript_fasta, vcf, readme, bed12, mito_name, macs_gsize] + + main: + + intervals_bed = reference.map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_intervals_bed ? [meta, input_intervals_bed] : null + } + + fasta = reference.map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_fasta ? [meta + [run_faidx: input_fasta_fai && input_fasta_sizes ? false : true] + [run_intervals: input_intervals_bed ? false : true] + [run_rsem_make_transcript_fasta: input_transcript_fasta ? false : true], input_fasta] : null + } + + fasta_dict = reference.map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_fasta_dict ? [meta, input_fasta_dict] : null + } + + fasta_fai = reference.map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_fasta_fai ? [meta + [run_intervals: input_intervals_bed ? false : true], input_fasta_fai] : null + } + + fasta_sizes = reference.map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_fasta_sizes ? [meta, input_fasta_sizes] : null + } + + gff = reference.map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_gff && !input_gtf ? [meta + [run_hisat2: input_splice_sites ? false : true], input_gff] : null + } + + gtf = reference.map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_gtf ? [meta + [run_hisat2: input_splice_sites ? false : true], input_gtf] : null + } + + splice_sites = reference.map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_splice_sites ? [meta, input_splice_sites] : null + } + + transcript_fasta = reference.map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_transcript_fasta ? [meta, input_transcript_fasta] : null + } + + vcf = reference + .map { meta, input_intervals_bed, input_fasta, input_fasta_dict, input_fasta_fai, input_fasta_sizes, input_gff, input_gtf, input_splice_sites, input_transcript_fasta, input_vcf, input_readme, input_bed12, input_mito_name, input_macs_gsize -> + return input_vcf ? [meta, file(input_vcf)] : null + } + .transpose() + + emit: + intervals_bed + fasta + fasta_dict + fasta_fai + fasta_sizes + gff + gtf + splice_sites + transcript_fasta + vcf +} diff --git a/subworkflows/local/utils_nfcore_references_pipeline/main.nf b/subworkflows/local/utils_nfcore_references_pipeline/main.nf index c6b006c..7885e2d 100644 --- a/subworkflows/local/utils_nfcore_references_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_references_pipeline/main.nf @@ -8,14 +8,14 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { samplesheetToList } from 'plugin/nf-schema' -include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' -include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' -include { imNotification } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' -include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { samplesheetToList } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -24,7 +24,6 @@ include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipelin */ workflow PIPELINE_INITIALISATION { - take: version // boolean: Display version and exit validate_params // boolean: Boolean whether to validate parameters against the schema at runtime @@ -40,7 +39,7 @@ workflow PIPELINE_INITIALISATION { // // Print version and exit if required and dump pipeline parameters to JSON file // - UTILS_NEXTFLOW_PIPELINE ( + UTILS_NEXTFLOW_PIPELINE( version, true, outdir, @@ -50,7 +49,7 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // - UTILS_NFSCHEMA_PLUGIN ( + UTILS_NFSCHEMA_PLUGIN( workflow, validate_params, null @@ -59,14 +58,14 @@ workflow PIPELINE_INITIALISATION { // // Check config provided to the pipeline // - UTILS_NFCORE_PIPELINE ( + UTILS_NFCORE_PIPELINE( nextflow_cli_args ) // // Create channel from input file provided through params.input // - ch_samplesheet = Channel.fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + ch_samplesheet = Channel.fromList(samplesheetToList(input, "${projectDir}/assets/schema_input.json")) emit: samplesheet = ch_samplesheet @@ -80,7 +79,6 @@ workflow PIPELINE_INITIALISATION { */ workflow PIPELINE_COMPLETION { - take: email // string: email address email_on_fail // string: email address sent on pipeline failure @@ -116,7 +114,7 @@ workflow PIPELINE_COMPLETION { } workflow.onError { - log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + log.error("Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting") } } @@ -133,11 +131,11 @@ def toolCitationText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", // Uncomment function in methodsDescriptionText to render in MultiQC report def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() return citation_text } @@ -147,9 +145,9 @@ def toolBibliographyText() { // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "