From 80a40470f039fa35385596bd909ad9934e3f3789 Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 7 Jul 2023 16:01:37 +0200 Subject: [PATCH 1/8] add ref sheet columns, update bwaaln --- conf/test_multiref.config | 2 +- modules.json | 2 +- nextflow.config | 2 +- nextflow_schema.json | 6 -- subworkflows/local/reference_indexing.nf | 43 +++++++++- .../local/reference_indexing_multi.nf | 67 ++++++++++------ .../local/reference_indexing_single.nf | 30 ++++++- .../nf-core/fastq_align_bwaaln/main.nf | 79 +++++++++++++++++-- .../nf-core/fastq_align_bwaaln/meta.yml | 10 +++ .../fastq_align_bwaaln/nextflow.config | 26 ++++++ workflows/eager.nf | 52 ++++++------ 11 files changed, 253 insertions(+), 66 deletions(-) create mode 100644 subworkflows/nf-core/fastq_align_bwaaln/nextflow.config diff --git a/conf/test_multiref.config b/conf/test_multiref.config index 23fcdb942..1e71b9922 100644 --- a/conf/test_multiref.config +++ b/conf/test_multiref.config @@ -23,7 +23,7 @@ params { input = 'https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/samplesheet_multilane_multilib.tsv' // Genome references - fasta = 'https://github.com/jfy133/nf-core-test-datasets/raw/eager/reference/reference_sheet_multiref.csv' + fasta = 'https://github.com/nf-core/test-datasets/raw/eager/reference/reference_sheet_multiref.csv' // BAM filtering run_bamfiltering = true diff --git a/modules.json b/modules.json index 566c4dbd2..e107375e4 100644 --- a/modules.json +++ b/modules.json @@ -206,7 +206,7 @@ }, "fastq_align_bwaaln": { "branch": "master", - "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653", + "git_sha": "e2c81fea3daeacfa190f78d2b82f82361b734507", "installed_by": ["subworkflows"] } } diff --git a/nextflow.config b/nextflow.config index 3c7b88d89..82491f55d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,7 +17,6 @@ params { fasta_dict = null fasta_mapperindexdir = null fasta_circular_target = null - fasta_mitochondrion_header = null fasta_largeref = false // References @@ -211,6 +210,7 @@ try { // Additional configs for subworkflows includeConfig 'subworkflows/nf-core/bam_split_by_region/nextflow.config' includeConfig 'subworkflows/nf-core/bam_docounts_contamination_angsd/nextflow.config' +includeConfig 'subworkflows/nf-core/fastq_align_bwaaln/nextflow.config' profiles { debug { diff --git a/nextflow_schema.json b/nextflow_schema.json index 044d4b635..234c85415 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -111,12 +111,6 @@ "description": "Specify the FASTA header of the target chromosome to extend. Only applies when using `circularmapper`.", "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`\n", "fa_icon": "fas fa-bullseye" - }, - "fasta_mitochondrion_header": { - "type": "string", - "fa_icon": "fas fa-tag", - "description": "Specify the name of the reference FASTA entry corresponding to the mitochondrial genome, up to the first space. Only applies when using `--run_mtnucratio`.", - "help_text": "Specify the FASTA entry in the reference file specified as `--fasta` that acts as the mitochondrial 'chromosome' to base a mitochondrial-to-nuclear ratio calculation on. \n\nThe tool only accepts the first section of the header before the first space. For example, mitochondrion chromosome name is `MT` for the hs37d5/GrCH37 human reference genome.\n" } } }, diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index f91bf18f0..6cd6a051c 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -17,20 +17,59 @@ workflow REFERENCE_INDEXING { // Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc. if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (fasta_fai || fasta_dict || fasta_mapperindexdir)) log.warn("A TSV or CSV has been supplied to `--fasta` as well as e.g. `--fasta_fai`. --fasta CSV/TSV takes priority and --fasta_* parameters will be ignored.") + if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask )) log.warn("A TSV or CSV has been supplied to `--fasta` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta CSV/TSV take priority and other input parameters will be ignored.") if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) { // If input (multi-)reference sheet supplied REFERENCE_INDEXING_MULTI ( fasta ) ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference + ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header + ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap + ch_pmd_mask = REFERENCE_INDEXING_MULTI.out.pmd_mask + ch_snp_capture_bed = REFERENCE_INDEXING_MULTI.out.snp_capture_bed + ch_snp_eigenstrat = REFERENCE_INDEXING_MULTI.out.snp_eigenstrat + ch_sexdeterrmine_bed = REFERENCE_INDEXING_MULTI.out.sexdeterrmine_bed ch_versions = ch_versions.mix( REFERENCE_INDEXING_MULTI.out.versions ) } else { // If input FASTA and/or indicies supplied REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir ) + ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header + ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap + ch_pmd_mask = REFERENCE_INDEXING_SINGLE.out.pmd_mask + ch_snp_capture_bed = REFERENCE_INDEXING_SINGLE.out.snp_capture_bed + ch_snp_eigenstrat = REFERENCE_INDEXING_SINGLE.out.snp_eigenstrat + ch_sexdeterrmine_bed = REFERENCE_INDEXING_SINGLE.out.sexdeterrmine_bed ch_reference_for_mapping = REFERENCE_INDEXING_SINGLE.out.reference ch_versions = ch_versions.mix( REFERENCE_INDEXING_SINGLE.out.versions ) } + + // Filter out input options that are not provided + ch_mitochondrion_header = ch_mitochondrion_header + .filter{ it[1] != "" } + + ch_hapmap = ch_hapmap + .filter{ it[1] != "" } + + ch_pmd_mask = ch_pmd_mask + .filter{ it[1] != "" && it[2] != "" } + + ch_snp_capture_bed = ch_snp_capture_bed + .filter{ it[1] != "" } + + ch_snp_eigenstrat = ch_snp_eigenstrat + .filter{ it[1] != "" } + + ch_sexdeterrmine_bed = ch_sexdeterrmine_bed + .filter{ it[1] != "" } + emit: - reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ] - versions = ch_versions + reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ] + mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] + hapmap = ch_hapmap // [ meta, hapmap ] + pmd_mask = ch_pmd_mask // [ meta, masked_fasta, capture_bed ] + snp_capture_bed = ch_snp_capture_bed // [ meta, capture_bed ] + snp_eigenstrat = ch_snp_eigenstrat // [ meta, snp_eigenstrat ] + sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ] + versions = ch_versions } diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf index 754189d03..53f258868 100644 --- a/subworkflows/local/reference_indexing_multi.nf +++ b/subworkflows/local/reference_indexing_multi.nf @@ -37,7 +37,12 @@ workflow REFERENCE_INDEXING_MULTI { def mapper_index = row["mapper_index"] != "" ? file(row["mapper_index"], checkIfExists: true) : "" def circular_target = row["circular_target"] def mitochondrion = row["mitochondrion_header"] - [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ] + def capture_bed = row["snpcapture_bed"] != "" ? file(row["snpcapture_bed"], checkIfExists: true) : "" + def snp_file = row["pileupcaller_snpfile"] != "" ? file(row["pileupcaller_snpfile"], checkIfExists: true) : "" + def hapmap = row["hapmap_file"] != "" ? file(row["hapmap_file"], checkIfExists: true) : "" + def pmd_mask = row["pmdtools_masked_fasta"] != "" ? file(row["pmdtools_masked_fasta"], checkIfExists: true) : "" + def sexdet_bed = row["sexdeterrmine_snp_bed"] != "" ? file(row["sexdeterrmine_snp_bed"], checkIfExists: true) : "" + [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, snp_file, hapmap, pmd_mask, sexdet_bed ] } @@ -52,10 +57,22 @@ workflow REFERENCE_INDEXING_MULTI { // DECOMPRESSION // +ch_input_from_referencesheet = ch_splitreferencesheet_for_branch + .multiMap { + meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, snp_file, hapmap, pmd_mask, sexdet_bed -> + generated: [ meta, fasta, fai, dict, mapper_index, circular_target ] + mitochondrion_header: [ meta, mitochondrion ] + angsd_hapmap: [ meta, hapmap ] + pmd_mask: [ meta, pmd_mask, capture_bed ] + snp_bed: [ meta, capture_bed ] + snp_eigenstrat: [ meta, snp_file ] + sexdeterrmine_bed: [ meta, sexdet_bed ] + } + // Detect if fasta is gzipped or not - ch_fasta_for_gunzip = ch_splitreferencesheet_for_branch + ch_fasta_for_gunzip = ch_input_from_referencesheet.generated .branch { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion -> + meta, fasta, fai, dict, mapper_index, circular_target -> forgunzip: fasta.extension == "gz" skip: true } @@ -63,9 +80,9 @@ workflow REFERENCE_INDEXING_MULTI { // Pull out name/file to match cardinality for GUNZIP module ch_gunzip_input = ch_fasta_for_gunzip.forgunzip .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion -> + meta, fasta, fai, dict, mapper_index, circular_target -> gunzip: [ meta, fasta ] - remainder: [ meta, fai, dict, mapper_index, circular_target, mitochondrion ] + remainder: [ meta, fai, dict, mapper_index, circular_target ] } @@ -83,7 +100,7 @@ workflow REFERENCE_INDEXING_MULTI { // Separate out non-faidxed references ch_fasta_for_faidx = ch_fasta_for_faiindexing .branch { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion -> + meta, fasta, fai, dict, mapper_index, circular_target -> forfaidx: fai == "" skip: true } @@ -92,9 +109,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_faidx_input = ch_fasta_for_faidx .forfaidx .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion -> + meta, fasta, fai, dict, mapper_index, circular_target -> faidx: [ meta, fasta ] - remainder: [ meta, fasta, dict, mapper_index, circular_target, mitochondrion ] // we drop fai here as we are going to make it + remainder: [ meta, fasta, dict, mapper_index, circular_target ] // we drop fai here as we are going to make it } SAMTOOLS_FAIDX ( ch_faidx_input.faidx ) @@ -104,9 +121,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_faidxed_formix = SAMTOOLS_FAIDX.out.fai .join( ch_faidx_input.remainder, failOnMismatch: true ) .map { - meta, fai, fasta, dict, mapper_index, circular_target, mitochondrion -> + meta, fai, fasta, dict, mapper_index, circular_target -> - [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ] + [ meta, fasta, fai, dict, mapper_index, circular_target ] } // Mix back newly faidx'd references with the pre-indexed ones @@ -118,7 +135,7 @@ workflow REFERENCE_INDEXING_MULTI { ch_fasta_for_dict = ch_fasta_for_dictindexing .branch { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion -> + meta, fasta, fai, dict, mapper_index, circular_target -> fordict: dict == "" skip: true } @@ -126,9 +143,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_dict_input = ch_fasta_for_dict .fordict .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion -> + meta, fasta, fai, dict, mapper_index, circular_target -> dict: [ meta, fasta ] - remainder: [ meta, fasta, fai, mapper_index, circular_target, mitochondrion ] + remainder: [ meta, fasta, fai, mapper_index, circular_target ] } PICARD_CREATESEQUENCEDICTIONARY ( ch_dict_input.dict ) @@ -137,9 +154,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_dicted_formix = PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict .join( ch_dict_input.remainder, failOnMismatch: true ) .map { - meta, dict, fasta, fai, mapper_index, circular_target, mitochondrion -> + meta, dict, fasta, fai, mapper_index, circular_target -> - [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ] + [ meta, fasta, fai, dict, mapper_index, circular_target ] } ch_dict_formapperindexing = ch_fasta_for_dict.skip.mix(ch_dicted_formix) @@ -152,7 +169,7 @@ workflow REFERENCE_INDEXING_MULTI { ch_fasta_for_mapperindex = ch_dict_formapperindexing .branch { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion -> + meta, fasta, fai, dict, mapper_index, circular_target -> forindex: mapper_index == "" skip: true } @@ -160,9 +177,9 @@ workflow REFERENCE_INDEXING_MULTI { ch_mapindex_input = ch_fasta_for_mapperindex .forindex .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion -> + meta, fasta, fai, dict, mapper_index, circular_target -> index: [ meta, fasta ] - remainder: [ meta, fasta, fai, dict, circular_target, mitochondrion ] + remainder: [ meta, fasta, fai, dict, circular_target ] } if ( params.mapping_tool == "bwaaln" || params.mapping_tool == "bwamem" ) { @@ -180,14 +197,20 @@ workflow REFERENCE_INDEXING_MULTI { ch_indexed_formix = ch_indexed_forremap .join( ch_mapindex_input.remainder, failOnMismatch: true ) .map { - meta, mapper_index, fasta, fai, dict, circular_target, mitochondrion -> + meta, mapper_index, fasta, fai, dict, circular_target -> - [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ] + [ meta, fasta, fai, dict, mapper_index, circular_target ] } ch_indexmapper_for_reference = ch_fasta_for_mapperindex.skip.mix(ch_indexed_formix) emit: - reference = ch_indexmapper_for_reference // [ meta, fasta, fai, dict, mapindex, circular_target, mitochondrion ] - versions = ch_versions + reference = ch_indexmapper_for_reference // [ meta, fasta, fai, dict, mapindex, circular_target ] + mitochondrion_header = ch_input_from_referencesheet.mitochondrion_header // [ meta, mitochondrion ] + hapmap = ch_input_from_referencesheet.angsd_hapmap // [ meta, hapmap ] + pmd_mask = ch_input_from_referencesheet.pmd_mask // [ meta, pmd_mask, capture_bed ] + snp_capture_bed = ch_input_from_referencesheet.snp_bed // [ meta, capture_bed ] + snp_eigenstrat = ch_input_from_referencesheet.snp_eigenstrat // [ meta, snp_eigenstrat ] + sexdeterrmine_bed = ch_input_from_referencesheet.sexdeterrmine_bed // [ meta, sexdet_bed ] + versions = ch_versions } diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index b32cfd4c8..25c852bc5 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -79,11 +79,35 @@ workflow REFERENCE_INDEXING_SINGLE { .join(ch_fasta_mapperindexdir, failOnMismatch: true) .map{ meta, fasta, fai, dict, mapper_index -> - [ meta, fasta, fai, dict, mapper_index, params.fasta_circular_target, params.fasta_mitochondrion_header ] + //TODO: add params for snpcapturebed, snpeigenstrat and sexdetbed once implemented + def contamination_estimation_angsd_hapmap = params.contamination_estimation_angsd_hapmap != null ? file( params.contamination_estimation_angsd_hapmap, checkIfExists: true ) : "" + def pmd_mask = params.damage_manipulation_pmdtools_reference_mask != null ? file(params.damage_manipulation_pmdtools_reference_mask, checkIfExists: true ) : "" + def capture_bed = "" + def snp_eigenstrat = "" + def sexdet_bed = "" + [ meta, fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, snp_eigenstrat, sexdet_bed ] + } + + ch_ref_index_single = ch_reference_for_mapping + .multiMap{ + meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, snp_eigenstrat, sexdet_bed -> + reference: [ meta, fasta, fai, dict, mapper_index, circular_target ] + mito_header: [ meta, mitochondrion_header ] + hapmap: [ meta, contamination_estimation_angsd_hapmap ] + pmd_mask: [ meta, pmd_mask, capture_bed ] + snp_bed: [ meta, capture_bed ] + snp_eigenstrat: [ meta, snp_eigenstrat ] + sexdeterrmine_bed: [ meta, sexdet_bed ] } emit: - reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ] - versions = ch_versions + reference = ch_ref_index_single.reference // [ meta, fasta, fai, dict, mapindex, circular_target ] + mitochondrion_header = ch_ref_index_single.mito_header // [ meta, mito_header ] + hapmap = ch_ref_index_single.hapmap // [ meta, hapmap ] + pmd_mask = ch_ref_index_single.pmd_mask // [ meta, pmd_mask, capture_bed ] + snp_capture_bed = ch_ref_index_single.snp_bed // [ meta, capture_bed ] + snp_eigenstrat = ch_ref_index_single.snp_eigenstrat // [ meta, snp_eigenstrat ] + sexdeterrmine_bed = ch_ref_index_single.sexdeterrmine_bed // [ meta, sexdet_bed ] + versions = ch_versions } diff --git a/subworkflows/nf-core/fastq_align_bwaaln/main.nf b/subworkflows/nf-core/fastq_align_bwaaln/main.nf index 3d2d6f607..647e83a31 100644 --- a/subworkflows/nf-core/fastq_align_bwaaln/main.nf +++ b/subworkflows/nf-core/fastq_align_bwaaln/main.nf @@ -17,10 +17,47 @@ workflow FASTQ_ALIGN_BWAALN { ch_versions = Channel.empty() - BWA_ALN ( ch_reads, ch_index ) + // WARNING: You must specify in your prefix `meta.id_index` in your `modules.conf` + // to ensure that you do not overwrite multiple BAM files from one sample mapped + // against multiple references. This meta map is added by the subworkflow but can be removed + // after if necessary. + + // Ensure when multiple references are provide, that reference/read combinations + // are correctly associated throughout the subworkflow by copying the sample + // specific metadata to the index on each combination + + ch_prepped_input = ch_reads + .combine(ch_index) + .map{ + meta, reads, meta_index, index -> + + // Create a combined id that includes the ids of the reads and the index used. + // Also keep the id of the index with a new name to avoid name collisions. + def key_read_ref = meta.id + "_" + meta_index.id + def id_index = meta_index.id + + [ meta + [key_read_ref: key_read_ref] + [id_index: id_index], reads, meta_index + [key_read_ref: key_read_ref] + [id_index: id_index], index ] + } + + // Drop the index_meta, as the id of the index is now kept within the read meta. + ch_preppedinput_for_bwaaln = ch_prepped_input + .multiMap { + meta, reads, meta_index, index -> + reads: [ meta, reads ] + index: [ meta, index ] + } + + + // Set as independent channel to allow repeated joining but _with_ sample specific metadata + // to ensure right reference goes with right sample + ch_reads_newid = ch_prepped_input.map{ meta, reads, meta_index, index -> [ meta, reads ] } + ch_index_newid = ch_prepped_input.map{ meta, reads, meta_index, index -> [ meta, index ] } + + // Alignment and conversion to bam + BWA_ALN ( ch_preppedinput_for_bwaaln.reads, ch_preppedinput_for_bwaaln.index ) ch_versions = ch_versions.mix( BWA_ALN.out.versions.first() ) - ch_sai_for_bam = ch_reads + ch_sai_for_bam = ch_reads_newid .join ( BWA_ALN.out.sai ) .branch { meta, reads, sai -> @@ -28,22 +65,48 @@ workflow FASTQ_ALIGN_BWAALN { se: meta.single_end } - BWA_SAMPE ( ch_sai_for_bam.pe, ch_index ) + // Split as PE/SE have different SAI -> BAM commands + ch_sai_for_bam_pe = ch_sai_for_bam.pe + .join ( ch_index_newid ) + .multiMap { + meta, reads, sai, index -> + reads: [ meta, reads, sai ] + index: [ meta, index ] + } + + ch_sai_for_bam_se = ch_sai_for_bam.se + .join ( ch_index_newid ) + .multiMap { + meta, reads, sai, index -> + reads: [ meta, reads, sai ] + index: [ meta, index ] + } + + + BWA_SAMPE ( ch_sai_for_bam_pe.reads, ch_sai_for_bam_pe.index ) ch_versions = ch_versions.mix( BWA_SAMPE.out.versions.first() ) - BWA_SAMSE ( ch_sai_for_bam.se, ch_index ) + BWA_SAMSE ( ch_sai_for_bam_se.reads, ch_sai_for_bam_se.index ) ch_versions = ch_versions.mix( BWA_SAMSE.out.versions.first() ) ch_bam_for_index = BWA_SAMPE.out.bam.mix( BWA_SAMSE.out.bam ) + // Index all SAMTOOLS_INDEX ( ch_bam_for_index ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + // Remove superfluous internal maps to minimise clutter as much as possible + ch_bam_for_emit = ch_bam_for_index.map{ meta, bam -> [meta - meta.subMap('key_read_ref'), bam] } + ch_bai_for_emit = SAMTOOLS_INDEX.out.bai.map{ meta, bai -> [meta - meta.subMap('key_read_ref'), bai] } + ch_csi_for_emit = SAMTOOLS_INDEX.out.csi.map{ meta, csi -> [meta - meta.subMap('key_read_ref'), csi] } + emit: - bam = ch_bam_for_index // channel: [ val(meta), path(bam) ] - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + // Note: output channels will contain meta with additional 'id_index' meta + // value to allow association of BAM file with the meta.id of input indicies + bam = ch_bam_for_emit // channel: [ val(meta), path(bam) ] + bai = ch_bai_for_emit // channel: [ val(meta), path(bai) ] + csi = ch_csi_for_emit // channel: [ val(meta), path(csi) ] - versions = ch_versions // channel: [ path(versions.yml) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/nf-core/fastq_align_bwaaln/meta.yml b/subworkflows/nf-core/fastq_align_bwaaln/meta.yml index 755550286..c38effd13 100644 --- a/subworkflows/nf-core/fastq_align_bwaaln/meta.yml +++ b/subworkflows/nf-core/fastq_align_bwaaln/meta.yml @@ -18,6 +18,9 @@ input: description: | Structure: [ val(meta), path(fastq) ] One or two FASTQ files for single or paired end data respectively. + Note: the subworkflow adds a new meta ID `meta.id_index` that _must_ + be used in `prefix` to ensure unique file names. See the associated + nextflow.config file. - ch_index: description: | Structure: [ val(meta), path(index) ] @@ -29,6 +32,9 @@ output: description: | Structure: [ val(meta), path(bam) ] Sorted BAM/CRAM/SAM file + Note: the subworkflow adds a new meta ID `meta.id_index` that _must_ + be used in `prefix` to ensure unique file names. See the associated + nextflow.config file. - bai: description: | Structure: [ val(meta), path(bai) ] @@ -37,5 +43,9 @@ output: description: | Structure: [ val(meta), path(csi) ] BAM/CRAM/SAM samtools index file for larger references + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] authors: - "@jfy133" diff --git a/subworkflows/nf-core/fastq_align_bwaaln/nextflow.config b/subworkflows/nf-core/fastq_align_bwaaln/nextflow.config new file mode 100644 index 000000000..2ff165516 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bwaaln/nextflow.config @@ -0,0 +1,26 @@ +// IMPORTANT: Add this configuration to your modules.config +// These settings are to ensure you include the reference information within the output file names, to prevent overwritting! +// `meta.id_index` is created and used within the workflow. You must reuse this meta field if you wish to customise the file naming scheme. + +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: '.*FASTQ_ALIGN_BWAALN:BWA_ALN' { + ext.prefix = { "${meta.id}_${meta.id_index}" } + } + + withName: '.*FASTQ_ALIGN_BWAALN:BWA_SAMSE' { + ext.prefix = { "${meta.id}_${meta.id_index}" } + } + + withName: '.*FASTQ_ALIGN_BWAALN:BWA_SAMPE' { + ext.prefix = { "${meta.id}_${meta.id_index}" } + } + + withName: '.*FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}_${meta.id_index}" } + } + + +} diff --git a/workflows/eager.nf b/workflows/eager.nf index 0911d4ca0..183717e5f 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -126,9 +126,6 @@ workflow EAGER { if ( params.preprocessing_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/eager] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --preprocessing_adapterlist ${params.preprocessing_adapterlist}" } - // Contamination estimation - hapmap_file = file(params.contamination_estimation_angsd_hapmap, checkIfExists:true) - // // SUBWORKFLOW: Read in samplesheet, validate and stage input files // @@ -137,11 +134,11 @@ workflow EAGER { file(params.input) ) ch_versions = ch_versions.mix( INPUT_CHECK.out.versions ) - + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ // ! There is currently no tooling to help you write a sample sheet schema - + // // SUBWORKFLOW: Indexing of reference files // @@ -181,7 +178,7 @@ workflow EAGER { // ch_reference_for_mapping = REFERENCE_INDEXING.out.reference .map{ - meta, fasta, fai, dict, index, circular_target, mitochondrion -> + meta, fasta, fai, dict, index, circular_target -> [ meta, index ] } @@ -233,7 +230,7 @@ workflow EAGER { ch_fasta_for_deduplication = REFERENCE_INDEXING.out.reference .multiMap{ - meta, fasta, fai, dict, index, circular_target, mitochondrion -> + meta, fasta, fai, dict, index, circular_target -> fasta: [ meta, fasta ] fasta_fai: [ meta, fai ] } @@ -310,13 +307,31 @@ workflow EAGER { // if ( params.run_mtnucratio ) { + ch_mito_header = REFERENCE_INDEXING.out.mitochondrion_header + .map{ + meta, mito_header -> + meta2 = [:] + meta2.reference = meta.id + [ meta2, meta, mito_header ] + } mtnucratio_input = ch_dedupped_bams - .map { - meta, bam, bai -> - [ meta, bam ] - } + .map { + meta, bam, bai -> + meta2 = [:] + meta2.reference = meta.reference + [ meta2, meta, bam ] + } + .combine( + by: 0, + ch_mito_header + ) + .multiMap{ + ignore_meta, meta, bam, meta2, mito_header -> + bam: [ meta, bam ] + mito_header: [ meta2, mito_header ] + } - MTNUCRATIO( mtnucratio_input, params.mitochondrion_header ) + MTNUCRATIO( mtnucratio_input.bam, mtnucratio_input.mito_header.map{ it[1] } ) ch_multiqc_files = ch_multiqc_files.mix(MTNUCRATIO.out.mtnucratio.collect{it[1]}.ifEmpty([])) ch_versions = ch_versions.mix( MTNUCRATIO.out.versions ) } @@ -341,7 +356,7 @@ workflow EAGER { ch_fasta_for_damagecalculation = REFERENCE_INDEXING.out.reference .multiMap{ - meta, fasta, fai, dict, index, circular_target, mitochondrion -> + meta, fasta, fai, dict, index, circular_target -> fasta: [ meta, fasta ] fasta_fai: [ meta, fai ] } @@ -359,22 +374,15 @@ workflow EAGER { if ( params.run_contamination_estimation_angsd ) { contamination_input = ch_dedupped_bams - ch_hapmap = Channel.of( [ hapmap_file ] ) - hapmap_input = REFERENCE_INDEXING.out.reference - .combine( ch_hapmap ) - .map { - meta, fasta, fai, dict, index, circular_target, mitochondrion, hapmap -> - [ meta, hapmap ] - } - ESTIMATE_CONTAMINATION( contamination_input, hapmap_input ) + ESTIMATE_CONTAMINATION( contamination_input, REFERENCE_INDEXING.out.hapmap ) ch_versions = ch_versions.mix( ESTIMATE_CONTAMINATION.out.versions ) ch_multiqc_files = ch_multiqc_files.mix( ESTIMATE_CONTAMINATION.out.mqc.collect{it[1]}.ifEmpty([]) ) } // // SUBWORKFLOW: aDNA Damage Manipulation - // + // TODO: Add pmd_mask and snp_capture input if ( params.run_mapdamage_rescaling || params.run_pmd_filtering || params.run_trim_bam ) { MANIPULATE_DAMAGE( ch_dedupped_bams, ch_fasta_for_deduplication.fasta ) From 02af1fc199eb6da4afccfa8c8df4869d300e60ab Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 4 Aug 2023 13:42:18 +0200 Subject: [PATCH 2/8] fix creation of multiple bwaaln jobs --- subworkflows/local/map.nf | 45 +++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 494acfb7b..3a5480d1e 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -19,8 +19,31 @@ workflow MAP { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - ch_input_for_mapping = reads - .combine(index) + if ( params.mapping_tool == 'bwaaln' ) { + ch_index_for_mapping = index + .map { + // Add meta.reference for tag and file name + meta, index -> + new_meta = meta.clone() + new_meta.reference = meta.id + [ new_meta, index ] + } + ch_reads_for_mapping = reads + .combine( ch_index_for_mapping ) + .map { + // Add meta.reference for tag and file name + meta, reads, meta2, index -> + meta.reference = meta2.reference + [ meta, reads] + } + FASTQ_ALIGN_BWAALN ( ch_reads_for_mapping, ch_index_for_mapping ) + ch_versions = ch_versions.mix ( FASTQ_ALIGN_BWAALN.out.versions.first() ) + ch_mapped_lane_bam = FASTQ_ALIGN_BWAALN.out.bam + ch_mapped_lane_bai = params.fasta_largeref ? FASTQ_ALIGN_BWAALN.out.csi : FASTQ_ALIGN_BWAALN.out.bai + + } else if ( params.mapping_tool == 'bwamem' ) { + ch_input_for_mapping = reads + .combine( index ) .multiMap { meta, reads, meta2, index -> new_meta = meta.clone() @@ -28,15 +51,6 @@ workflow MAP { reads: [ new_meta, reads ] index: [ meta2, index] } - - if ( params.mapping_tool == 'bwaaln' ) { - FASTQ_ALIGN_BWAALN ( ch_input_for_mapping.reads, ch_input_for_mapping.index ) - - ch_versions = ch_versions.mix ( FASTQ_ALIGN_BWAALN.out.versions.first() ) - ch_mapped_lane_bam = FASTQ_ALIGN_BWAALN.out.bam - ch_mapped_lane_bai = params.fasta_largeref ? FASTQ_ALIGN_BWAALN.out.csi : FASTQ_ALIGN_BWAALN.out.bai - - } else if ( params.mapping_tool == 'bwamem' ) { BWA_MEM ( ch_input_for_mapping.reads, ch_input_for_mapping.index, true ) ch_versions = ch_versions.mix ( BWA_MEM.out.versions.first() ) ch_mapped_lane_bam = BWA_MEM.out.bam @@ -46,6 +60,15 @@ workflow MAP { ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_MEM.out.csi : SAMTOOLS_INDEX_MEM.out.bai } else if ( params.mapping_tool == 'bowtie2' ) { + ch_input_for_mapping = reads + .combine( index ) + .multiMap { + meta, reads, meta2, index -> + new_meta = meta.clone() + new_meta.reference = meta2.id + reads: [ new_meta, reads ] + index: [ meta2, index] + } BOWTIE2_ALIGN ( ch_input_for_mapping.reads, ch_input_for_mapping.index, false, true ) ch_versions = ch_versions.mix ( BOWTIE2_ALIGN.out.versions.first() ) ch_mapped_lane_bam = BOWTIE2_ALIGN.out.bam From 35ac27cfd277dbee07df97dce62c5925e1e36fee Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 25 Aug 2023 17:48:59 +0200 Subject: [PATCH 3/8] actually fix multiple bwa aln mapping (?) --- conf/modules.config | 6 +++--- subworkflows/local/map.nf | 37 +++++++++++++++++-------------------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 354628576..5c67d64fe 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -352,7 +352,7 @@ process { // READ MAPPING // withName: BWA_ALN { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ @@ -361,7 +361,7 @@ process { } withName: 'BWA_SAMSE|BWA_SAMPE' { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = { "-r '@RG\\tID:ILLUMINA-${meta.library_id}\\tSM:${meta.sample_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}'" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ @@ -370,7 +370,7 @@ process { } withName: ".*MAP:FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX" { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } + tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 3a5480d1e..2f43d4f06 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -2,13 +2,13 @@ // Prepare reference indexing for downstream // -include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' -include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main' -include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' -include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main' +include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main' +include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main' +include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main' +include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main' include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM; SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2; SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main' -include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED } from '../../modules/nf-core/samtools/flagstat/main' +include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED } from '../../modules/nf-core/samtools/flagstat/main' workflow MAP { take: @@ -21,24 +21,19 @@ workflow MAP { if ( params.mapping_tool == 'bwaaln' ) { ch_index_for_mapping = index - .map { - // Add meta.reference for tag and file name - meta, index -> - new_meta = meta.clone() - new_meta.reference = meta.id - [ new_meta, index ] - } ch_reads_for_mapping = reads - .combine( ch_index_for_mapping ) - .map { - // Add meta.reference for tag and file name - meta, reads, meta2, index -> - meta.reference = meta2.reference - [ meta, reads] - } + FASTQ_ALIGN_BWAALN ( ch_reads_for_mapping, ch_index_for_mapping ) ch_versions = ch_versions.mix ( FASTQ_ALIGN_BWAALN.out.versions.first() ) ch_mapped_lane_bam = FASTQ_ALIGN_BWAALN.out.bam + .map{ + // create meta consistent with rest of workflow + meta, bam -> + new_meta = meta.clone() + new_meta.reference = meta.id_index + [ new_meta, bam ] + } + ch_mapped_lane_bai = params.fasta_largeref ? FASTQ_ALIGN_BWAALN.out.csi : FASTQ_ALIGN_BWAALN.out.bai } else if ( params.mapping_tool == 'bwamem' ) { @@ -51,6 +46,7 @@ workflow MAP { reads: [ new_meta, reads ] index: [ meta2, index] } + BWA_MEM ( ch_input_for_mapping.reads, ch_input_for_mapping.index, true ) ch_versions = ch_versions.mix ( BWA_MEM.out.versions.first() ) ch_mapped_lane_bam = BWA_MEM.out.bam @@ -69,6 +65,7 @@ workflow MAP { reads: [ new_meta, reads ] index: [ meta2, index] } + BOWTIE2_ALIGN ( ch_input_for_mapping.reads, ch_input_for_mapping.index, false, true ) ch_versions = ch_versions.mix ( BOWTIE2_ALIGN.out.versions.first() ) ch_mapped_lane_bam = BOWTIE2_ALIGN.out.bam From 17949b91cf509e35799b382ccaf9b9458a018700 Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 22 Sep 2023 15:06:27 +0200 Subject: [PATCH 4/8] starting to integrate bedtools and qualimap --- conf/test_humanbam.config | 3 + subworkflows/local/reference_indexing.nf | 33 +++++-- .../local/reference_indexing_multi.nf | 14 +-- .../local/reference_indexing_single.nf | 16 ++-- workflows/eager.nf | 85 +++++++++---------- 5 files changed, 89 insertions(+), 62 deletions(-) diff --git a/conf/test_humanbam.config b/conf/test_humanbam.config index 8439f86f2..47ba00e93 100644 --- a/conf/test_humanbam.config +++ b/conf/test_humanbam.config @@ -31,6 +31,9 @@ params { contamination_estimation_angsd_mapq = 0 contamination_estimation_angsd_minq = 0 + // Qualimap + snpcapture_bed = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' + // TODO Reactivate sexDet and genotyping params when those steps get implemented. // //Sex Determination // sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz' diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 6cd6a051c..9d5e523f3 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -4,6 +4,7 @@ include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf' include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_indexing_multi.nf' +include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf' workflow REFERENCE_INDEXING { take: @@ -17,7 +18,7 @@ workflow REFERENCE_INDEXING { // Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc. if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (fasta_fai || fasta_dict || fasta_mapperindexdir)) log.warn("A TSV or CSV has been supplied to `--fasta` as well as e.g. `--fasta_fai`. --fasta CSV/TSV takes priority and --fasta_* parameters will be ignored.") - if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask )) log.warn("A TSV or CSV has been supplied to `--fasta` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta CSV/TSV take priority and other input parameters will be ignored.") + if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.mapstats_bedtools_featurefile )) log.warn("A TSV or CSV has been supplied to `--fasta` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta CSV/TSV take priority and other input parameters will be ignored.") if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) { // If input (multi-)reference sheet supplied @@ -27,8 +28,9 @@ workflow REFERENCE_INDEXING { ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap ch_pmd_mask = REFERENCE_INDEXING_MULTI.out.pmd_mask ch_snp_capture_bed = REFERENCE_INDEXING_MULTI.out.snp_capture_bed - ch_snp_eigenstrat = REFERENCE_INDEXING_MULTI.out.snp_eigenstrat + ch_pileupcaller_snp = REFERENCE_INDEXING_MULTI.out.pileupcaller_snp ch_sexdeterrmine_bed = REFERENCE_INDEXING_MULTI.out.sexdeterrmine_bed + ch_bedtools_feature = REFERENCE_INDEXING_MULTI.out.bedtools_feature ch_versions = ch_versions.mix( REFERENCE_INDEXING_MULTI.out.versions ) } else { // If input FASTA and/or indicies supplied @@ -37,8 +39,9 @@ workflow REFERENCE_INDEXING { ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap ch_pmd_mask = REFERENCE_INDEXING_SINGLE.out.pmd_mask ch_snp_capture_bed = REFERENCE_INDEXING_SINGLE.out.snp_capture_bed - ch_snp_eigenstrat = REFERENCE_INDEXING_SINGLE.out.snp_eigenstrat + ch_pileupcaller_snp = REFERENCE_INDEXING_SINGLE.out.pileupcaller_snp ch_sexdeterrmine_bed = REFERENCE_INDEXING_SINGLE.out.sexdeterrmine_bed + ch_bedtools_feature = REFERENCE_INDEXING_SINGLE.out.bedtools_feature ch_reference_for_mapping = REFERENCE_INDEXING_SINGLE.out.reference ch_versions = ch_versions.mix( REFERENCE_INDEXING_SINGLE.out.versions ) } @@ -53,23 +56,37 @@ workflow REFERENCE_INDEXING { ch_pmd_mask = ch_pmd_mask .filter{ it[1] != "" && it[2] != "" } - ch_snp_capture_bed = ch_snp_capture_bed + ch_capture_bed = ch_snp_capture_bed .filter{ it[1] != "" } + if ( ch_capture_bed != "") { + ch_capture_bed_gunzip = ch_capture_bed + .branch { + meta, capture_bed -> + forgunzip: capture_bed.extension == "gz" + skip: true + } + GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip ) + ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip ) + } - ch_snp_eigenstrat = ch_snp_eigenstrat - .filter{ it[1] != "" } + ch_pileupcaller_snp = ch_pileupcaller_snp + .filter{ it[1] != "" && it[2] != "" } ch_sexdeterrmine_bed = ch_sexdeterrmine_bed .filter{ it[1] != "" } + ch_bedtools_feature = ch_bedtools_feature + .filter{ it[1] != "" } + emit: reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ] mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ] hapmap = ch_hapmap // [ meta, hapmap ] pmd_mask = ch_pmd_mask // [ meta, masked_fasta, capture_bed ] - snp_capture_bed = ch_snp_capture_bed // [ meta, capture_bed ] - snp_eigenstrat = ch_snp_eigenstrat // [ meta, snp_eigenstrat ] + snp_capture_bed = ch_capture_bed // [ meta, capture_bed ] + pileupcaller_snp = ch_pileupcaller_snp // [ meta, pileupcaller_bed, pileupcaller_snp ] sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ] + bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ] versions = ch_versions } diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf index 53f258868..1ca630498 100644 --- a/subworkflows/local/reference_indexing_multi.nf +++ b/subworkflows/local/reference_indexing_multi.nf @@ -38,11 +38,13 @@ workflow REFERENCE_INDEXING_MULTI { def circular_target = row["circular_target"] def mitochondrion = row["mitochondrion_header"] def capture_bed = row["snpcapture_bed"] != "" ? file(row["snpcapture_bed"], checkIfExists: true) : "" - def snp_file = row["pileupcaller_snpfile"] != "" ? file(row["pileupcaller_snpfile"], checkIfExists: true) : "" + def pileupcaller_bed = row["pileupcaller_bedfile"] != "" ? file(row["pileupcaller_bedfile"], checkIfExists: true) : "" + def pileupcaller_snp = row["pileupcaller_snpfile"] != "" ? file(row["pileupcaller_snpfile"], checkIfExists: true) : "" def hapmap = row["hapmap_file"] != "" ? file(row["hapmap_file"], checkIfExists: true) : "" def pmd_mask = row["pmdtools_masked_fasta"] != "" ? file(row["pmdtools_masked_fasta"], checkIfExists: true) : "" def sexdet_bed = row["sexdeterrmine_snp_bed"] != "" ? file(row["sexdeterrmine_snp_bed"], checkIfExists: true) : "" - [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, snp_file, hapmap, pmd_mask, sexdet_bed ] + def bedtools_feature = row["bedtools_feature_file"] != "" ? file(row["bedtools_feature_file"], checkIfExists: true) : "" + [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_mask, sexdet_bed, bedtools_feature ] } @@ -59,14 +61,15 @@ workflow REFERENCE_INDEXING_MULTI { ch_input_from_referencesheet = ch_splitreferencesheet_for_branch .multiMap { - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, snp_file, hapmap, pmd_mask, sexdet_bed -> + meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_mask, sexdet_bed, bedtools_feature -> generated: [ meta, fasta, fai, dict, mapper_index, circular_target ] mitochondrion_header: [ meta, mitochondrion ] angsd_hapmap: [ meta, hapmap ] pmd_mask: [ meta, pmd_mask, capture_bed ] snp_bed: [ meta, capture_bed ] - snp_eigenstrat: [ meta, snp_file ] + pileupcaller_snp: [ meta, pileupcaller_bed, pileupcaller_snp ] sexdeterrmine_bed: [ meta, sexdet_bed ] + bedtools_feature: [ meta, bedtools_feature ] } // Detect if fasta is gzipped or not @@ -210,7 +213,8 @@ ch_input_from_referencesheet = ch_splitreferencesheet_for_branch hapmap = ch_input_from_referencesheet.angsd_hapmap // [ meta, hapmap ] pmd_mask = ch_input_from_referencesheet.pmd_mask // [ meta, pmd_mask, capture_bed ] snp_capture_bed = ch_input_from_referencesheet.snp_bed // [ meta, capture_bed ] - snp_eigenstrat = ch_input_from_referencesheet.snp_eigenstrat // [ meta, snp_eigenstrat ] + pileupcaller_snp = ch_input_from_referencesheet.pileupcaller_snp // [ meta, pileupcaller_snp, pileupcaller_bed ] sexdeterrmine_bed = ch_input_from_referencesheet.sexdeterrmine_bed // [ meta, sexdet_bed ] + bedtools_feature = ch_input_from_referencesheet.bedtools_feature // [ meta, bedtools_feature ] versions = ch_versions } diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index 25c852bc5..43ebbf262 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -82,22 +82,25 @@ workflow REFERENCE_INDEXING_SINGLE { //TODO: add params for snpcapturebed, snpeigenstrat and sexdetbed once implemented def contamination_estimation_angsd_hapmap = params.contamination_estimation_angsd_hapmap != null ? file( params.contamination_estimation_angsd_hapmap, checkIfExists: true ) : "" def pmd_mask = params.damage_manipulation_pmdtools_reference_mask != null ? file(params.damage_manipulation_pmdtools_reference_mask, checkIfExists: true ) : "" - def capture_bed = "" - def snp_eigenstrat = "" + def capture_bed = params.snpcapture_bed != null ? file(params.snpcapture_bed, checkIfExists: true ) : "" + def pileupcaller_bed = "" + def pileupcaller_snp = "" def sexdet_bed = "" - [ meta, fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, snp_eigenstrat, sexdet_bed ] + def bedtools_feature = params.mapstats_bedtools_featurefile != null ? file(params.mapstats_bedtools_featurefile, checkIfExists: true ) : "" + [ meta, fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature ] } ch_ref_index_single = ch_reference_for_mapping .multiMap{ - meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, snp_eigenstrat, sexdet_bed -> + meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature -> reference: [ meta, fasta, fai, dict, mapper_index, circular_target ] mito_header: [ meta, mitochondrion_header ] hapmap: [ meta, contamination_estimation_angsd_hapmap ] pmd_mask: [ meta, pmd_mask, capture_bed ] snp_bed: [ meta, capture_bed ] - snp_eigenstrat: [ meta, snp_eigenstrat ] + pileupcaller_snp: [ meta, pileupcaller_bed, pileupcaller_snp ] sexdeterrmine_bed: [ meta, sexdet_bed ] + bedtools_feature: [ meta, bedtools_feature ] } emit: @@ -106,8 +109,9 @@ workflow REFERENCE_INDEXING_SINGLE { hapmap = ch_ref_index_single.hapmap // [ meta, hapmap ] pmd_mask = ch_ref_index_single.pmd_mask // [ meta, pmd_mask, capture_bed ] snp_capture_bed = ch_ref_index_single.snp_bed // [ meta, capture_bed ] - snp_eigenstrat = ch_ref_index_single.snp_eigenstrat // [ meta, snp_eigenstrat ] + pileupcaller_snp = ch_ref_index_single.pileupcaller_snp // [ meta, pileupcaller_bed, pileupcaller_snp ] sexdeterrmine_bed = ch_ref_index_single.sexdeterrmine_bed // [ meta, sexdet_bed ] + bedtools_feature = ch_ref_index_single.bedtools_feature // [ meta, bedtools_feature ] versions = ch_versions } diff --git a/workflows/eager.nf b/workflows/eager.nf index 04148ebe8..b34f1634e 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -101,7 +101,6 @@ include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTATS_BAM_INPUT } from '../modules/n include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH ; BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main' include { SAMTOOLS_VIEW_GENOME } from '../modules/local/samtools_view_genome.nf' include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' -include { GUNZIP as GUNZIP_SNPBED } from '../modules/nf-core/gunzip/main.nf' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -138,28 +137,6 @@ workflow EAGER { if ( params.preprocessing_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/eager] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --preprocessing_adapterlist ${params.preprocessing_adapterlist}" } - // QualiMap - if ( params.snpcapture_bed ) { - ch_snpcapture_bed_gunzip = Channel.fromPath( params.snpcapture_bed, checkIfExists: true ) - .collect() - .map { - file -> - meta = file.simpleName - [meta,file] - } - .branch { - meta, bed -> - forgunzip: bed[0].extension == "gz" - skip: true - } - ch_snpcapture_bed = GUNZIP_SNPBED(ch_snpcapture_bed_gunzip.forgunzip).gunzip.mix(ch_snpcapture_bed_gunzip.skip).map{it[1]} - - } else { - ch_snpcapture_bed = [] - } - - // Contamination estimation - hapmap_file = file(params.contamination_estimation_angsd_hapmap, checkIfExists:true) // // SUBWORKFLOW: Read in samplesheet, validate and stage input files @@ -296,13 +273,30 @@ workflow EAGER { // if ( !params.skip_qualimap ) { - ch_qualimap_input = ch_dedupped_bams + ch_snp_capture_bed = REFERENCE_INDEXING.out.snp_capture_bed + .map{ + WorkflowEager.addNewMetaFromAttributes( it, "id" , "reference" , false ) + } + ch_qualimap_input = ch_dedupped_bams .map { meta, bam, bai -> [ meta, bam ] } - QUALIMAP_BAMQC(ch_qualimap_input, ch_snpcapture_bed) - ch_versions = ch_versions.mix( QUALIMAP_BAMQC.out.versions ) + .map { + WorkflowEager.addNewMetaFromAttributes( it, "reference" , "reference" , false ) + } + .combine( + by: 0, + ch_snp_capture_bed + ) + .multiMap{ + ignore_meta, meta, bam, meta2, snp_capture_bed -> + bam: [ meta, bam ] + snp_capture_bed: [ snp_capture_bed ] + } + QUALIMAP_BAMQC( ch_qualimap_input.bam, ch_qualimap_input.snp_capture_bed ) + ch_versions = ch_versions.mix( QUALIMAP_BAMQC.out.versions ) + // Qualimap multiqc files? } // @@ -367,17 +361,11 @@ workflow EAGER { if ( params.run_mtnucratio ) { ch_mito_header = REFERENCE_INDEXING.out.mitochondrion_header .map{ - meta, mito_header -> - meta2 = [:] - meta2.reference = meta.id - [ meta2, meta, mito_header ] + WorkflowEager.addNewMetaFromAttributes( it, "id" , "reference" , false ) } mtnucratio_input = ch_dedupped_bams .map { - meta, bam, bai -> - meta2 = [:] - meta2.reference = meta.reference - [ meta2, meta, bam ] + WorkflowEager.addNewMetaFromAttributes( it, "reference" , "reference" , false ) } .combine( by: 0, @@ -386,7 +374,7 @@ workflow EAGER { .multiMap{ ignore_meta, meta, bam, meta2, mito_header -> bam: [ meta, bam ] - mito_header: [ meta2, mito_header ] + mito_header: [ meta2, mito_header ] //should this be meta, mito_header? } MTNUCRATIO( mtnucratio_input.bam, mtnucratio_input.mito_header.map{ it[1] } ) @@ -453,12 +441,22 @@ workflow EAGER { if ( params.run_bedtools_coverage ) { - ch_anno_for_bedtools = Channel.fromPath(params.mapstats_bedtools_featurefile, checkIfExists: true).collect() - - ch_dedupped_for_bedtools = ch_dedupped_bams.combine(ch_anno_for_bedtools) - .map{ - meta, bam, bai, anno -> - [meta, anno, bam] + ch_bedtools_feature = REFERENCE_INDEXING.out.bedtools_feature + .map{ + WorkflowEager.addNewMetaFromAttributes( it, "id" , "reference" , false ) + } + + ch_bedtools_input = ch_dedupped_bams + .map { + WorkflowEager.addNewMetaFromAttributes( it, "reference" , "reference" , false ) + } + .combine( + by: 0, + ch_bedtools_feature + ) + .map{ + ignore_meta, meta, bam, bai, meta2, bedtools_feature -> + [ meta, bedtools_feature, bam ] } // Running samtools view to get header @@ -466,12 +464,13 @@ workflow EAGER { ch_genome_for_bedtools = SAMTOOLS_VIEW_GENOME.out.genome - BEDTOOLS_COVERAGE_BREADTH(ch_dedupped_for_bedtools, ch_genome_for_bedtools) - BEDTOOLS_COVERAGE_DEPTH(ch_dedupped_for_bedtools, ch_genome_for_bedtools) + BEDTOOLS_COVERAGE_BREADTH(ch_bedtools_input, ch_genome_for_bedtools) + BEDTOOLS_COVERAGE_DEPTH(ch_bedtools_input, ch_genome_for_bedtools) ch_versions = ch_versions.mix( SAMTOOLS_VIEW_GENOME.out.versions ) ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_BREADTH.out.versions ) ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_DEPTH.out.versions ) + //Bedtools multiqc files? } // From 17453404e05ceafef698c2da38a6e2be921650d8 Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 29 Sep 2023 15:13:48 +0200 Subject: [PATCH 5/8] started to make qualimap bed input optional --- conf/modules.config | 2 +- subworkflows/local/reference_indexing.nf | 16 +++++++++------- workflows/eager.nf | 22 +++++++++++++++++----- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 4f17a6cc7..619283ada 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -819,7 +819,7 @@ process { ] } - withName: "QUALIMAP_BAMQC" { + withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } publishDir = [ path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/${meta.sample_id}/}" }, diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf index 9d5e523f3..13357d4ea 100644 --- a/subworkflows/local/reference_indexing.nf +++ b/subworkflows/local/reference_indexing.nf @@ -56,18 +56,20 @@ workflow REFERENCE_INDEXING { ch_pmd_mask = ch_pmd_mask .filter{ it[1] != "" && it[2] != "" } - ch_capture_bed = ch_snp_capture_bed - .filter{ it[1] != "" } - if ( ch_capture_bed != "") { - ch_capture_bed_gunzip = ch_capture_bed + ch_capture_bed = ch_snp_capture_bed //optional + .branch { + meta, capture_bed -> + input: capture_bed != "" + skip: true + } + ch_capture_bed_gunzip = ch_capture_bed.input //unzip .branch { meta, capture_bed -> forgunzip: capture_bed.extension == "gz" skip: true } - GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip ) - ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip ) - } + GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip ) + ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip ).mix( ch_capture_bed.skip ) ch_pileupcaller_snp = ch_pileupcaller_snp .filter{ it[1] != "" && it[2] != "" } diff --git a/workflows/eager.nf b/workflows/eager.nf index b34f1634e..bd9a75b1f 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -100,7 +100,7 @@ include { ENDORSPY } from '../modules/n include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTATS_BAM_INPUT } from '../modules/nf-core/samtools/flagstat/main' include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH ; BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main' include { SAMTOOLS_VIEW_GENOME } from '../modules/local/samtools_view_genome.nf' -include { QUALIMAP_BAMQC } from '../modules/nf-core/qualimap/bamqc/main' +include { QUALIMAP_BAMQC as QUALIMAP_BAMQC_NOBED ; QUALIMAP_BAMQC as QUALIMAP_BAMQC_WITHBED } from '../modules/nf-core/qualimap/bamqc/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -289,14 +289,26 @@ workflow EAGER { by: 0, ch_snp_capture_bed ) + .branch { + ignore_meta, meta, bam, meta2, snp_capture_bed -> + withbed: snp_capture_bed != "" + nobed: true + } + ch_qualimap_input_with = ch_qualimap_input.withbed .multiMap{ ignore_meta, meta, bam, meta2, snp_capture_bed -> bam: [ meta, bam ] snp_capture_bed: [ snp_capture_bed ] } - QUALIMAP_BAMQC( ch_qualimap_input.bam, ch_qualimap_input.snp_capture_bed ) - ch_versions = ch_versions.mix( QUALIMAP_BAMQC.out.versions ) - // Qualimap multiqc files? + QUALIMAP_BAMQC_WITHBED( ch_qualimap_input_with.bam, ch_qualimap_input_with.snp_capture_bed ) + ch_qualimap_input_without = ch_qualimap_input.nobed + .map{ + ignore_meta, meta, bam, meta2, snp_capture_bed -> + [ meta, bam ] + } + QUALIMAP_BAMQC_NOBED( ch_qualimap_input_without, [] ) + ch_qualimap_output = QUALIMAP_BAMQC_WITHBED.out.results.mix( QUALIMAP_BAMQC_NOBED.out.results ) + ch_versions = ch_versions.mix( QUALIMAP_BAMQC_NOBED.out.versions ).mix( QUALIMAP_BAMQC_WITHBED.out.versions ) } // @@ -536,7 +548,7 @@ workflow EAGER { //ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) // Replaced with custom mixing if ( !params.skip_qualimap ) { - ch_multiqc_files = ch_multiqc_files.mix( QUALIMAP_BAMQC.out.results.collect{it[1]}.ifEmpty([]) ) + ch_multiqc_files = ch_multiqc_files.mix( ch_qualimap_output.collect{it[1]}.ifEmpty([]) ) } MULTIQC ( From c43cca43b71c13958990618b76e6583b0cc6cc4a Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 6 Oct 2023 11:10:36 +0200 Subject: [PATCH 6/8] fix mtnucratio --- workflows/eager.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/eager.nf b/workflows/eager.nf index bd9a75b1f..b9a36b0de 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -384,9 +384,9 @@ workflow EAGER { ch_mito_header ) .multiMap{ - ignore_meta, meta, bam, meta2, mito_header -> + ignore_meta, meta, bam, bai, meta2, mito_header -> bam: [ meta, bam ] - mito_header: [ meta2, mito_header ] //should this be meta, mito_header? + mito_header: [ meta2, mito_header ] } MTNUCRATIO( mtnucratio_input.bam, mtnucratio_input.mito_header.map{ it[1] } ) From 7dca621547d118da3990b589637b5004cb0e9927 Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 6 Oct 2023 15:19:15 +0200 Subject: [PATCH 7/8] bedtools multiref integration --- docs/development/manual_tests.md | 7 ++++++- workflows/eager.nf | 30 ++++++++++++++++++------------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md index a02f22588..d339d4ba7 100644 --- a/docs/development/manual_tests.md +++ b/docs/development/manual_tests.md @@ -94,10 +94,11 @@ Tool Specific combinations - with stricter threshold - BAM trimming + - with default parameters - different length by udg treatment -- All together + - All together ### Multi-reference tests @@ -145,6 +146,10 @@ nextflow run ../main.nf -profile singularity,test --outdir ./results --input sam ## Test: (11) Broken path correctly fails pipeline ✅ ## Expect: Expect fail nextflow run ../main.nf -profile singularity,test --outdir ./results --input samplesheet.tsv --fasta reference_sheet_multiref_test11.csv -ansi-log false -dump-channels --save_reference + +# Test: File input via reference sheet +# Expect: Qualimap with bed, mtnucratio and angsd successful and bedtools not run for hs37d5, qualimap without bed file, mtnucratio and bedtools successful and angsd not run for Mammoth_MT +nextflow run main.nf -profile test_multiref,docker --outdir ./results --run_bedtools_coverage --run_contamination_estimation_angsd --run_mtnucratio ``` ### AdapterRemoval diff --git a/workflows/eager.nf b/workflows/eager.nf index b9a36b0de..3b38ec2fc 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -32,11 +32,6 @@ if ( params.metagenomics_complexity_tool == 'prinseq' && params.metagenomics_pri exit 1, ("[nf-core/eager] ERROR: Metagenomics: You picked PRINSEQ++ with 'entropy' mode but provided a dust score. Please specify an entropy filter threshold using the --metagenomics_complexity_entropy flag") } } -if( params.run_bedtools_coverage ){ - if( !params.mapstats_bedtools_featurefile ) { - exit 1, "[nf-core/eager] ERROR: you have turned on bedtools coverage, but not specified a BED or GFF file with --mapstats_bedtools_featurefile. Please validate your parameters." - } -} // TODO What to do when params.preprocessing_excludeunmerged is provided but the data is SE? if ( params.deduplication_tool == 'dedup' && ! params.preprocessing_excludeunmerged ) { exit 1, "[nf-core/eager] ERROR: Dedup can only be used on collapsed (i.e. merged) PE reads. For all other cases, please set --deduplication_tool to 'markduplicates'."} @@ -458,7 +453,7 @@ workflow EAGER { WorkflowEager.addNewMetaFromAttributes( it, "id" , "reference" , false ) } - ch_bedtools_input = ch_dedupped_bams + ch_bedtools_prep = ch_dedupped_bams .map { WorkflowEager.addNewMetaFromAttributes( it, "reference" , "reference" , false ) } @@ -468,21 +463,32 @@ workflow EAGER { ) .map{ ignore_meta, meta, bam, bai, meta2, bedtools_feature -> - [ meta, bedtools_feature, bam ] - } + [ meta, bedtools_feature, bam, bai ] + } + .branch{ + meta, bedtools_feature, bam, bai -> + withfeature: bedtools_feature != "" + nobed: true + } // Running samtools view to get header - SAMTOOLS_VIEW_GENOME(ch_dedupped_bams) + ch_bedtools_input = ch_bedtools_prep.withfeature + .multiMap{ + meta, bedtools_feature, bam, bai -> + bam: [ meta, bam, bai ] + withfeature: [ meta, bedtools_feature, bam ] + } + + SAMTOOLS_VIEW_GENOME( ch_bedtools_input.bam ) ch_genome_for_bedtools = SAMTOOLS_VIEW_GENOME.out.genome - BEDTOOLS_COVERAGE_BREADTH(ch_bedtools_input, ch_genome_for_bedtools) - BEDTOOLS_COVERAGE_DEPTH(ch_bedtools_input, ch_genome_for_bedtools) + BEDTOOLS_COVERAGE_BREADTH(ch_bedtools_input.withfeature, ch_genome_for_bedtools) + BEDTOOLS_COVERAGE_DEPTH(ch_bedtools_input.withfeature, ch_genome_for_bedtools) ch_versions = ch_versions.mix( SAMTOOLS_VIEW_GENOME.out.versions ) ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_BREADTH.out.versions ) ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_DEPTH.out.versions ) - //Bedtools multiqc files? } // From 6e2abeb47b3e31788264f9e1337e1c5c73f2f5aa Mon Sep 17 00:00:00 2001 From: scarlhoff Date: Fri, 20 Oct 2023 11:07:51 +0200 Subject: [PATCH 8/8] integrate review comments --- subworkflows/local/map.nf | 13 +++++-------- workflows/eager.nf | 2 ++ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf index 5d196aaca..dc38315ab 100644 --- a/subworkflows/local/map.nf +++ b/subworkflows/local/map.nf @@ -29,8 +29,7 @@ workflow MAP { .map{ // create meta consistent with rest of workflow meta, bam -> - new_meta = meta.clone() - new_meta.reference = meta.id_index + new_meta = meta + [ reference: meta.id_index ] [ new_meta, bam ] } @@ -41,10 +40,9 @@ workflow MAP { .combine( index ) .multiMap { meta, reads, meta2, index -> - new_meta = meta.clone() - new_meta.reference = meta2.id + new_meta = meta + [ reference: meta2.id ] reads: [ new_meta, reads ] - index: [ meta2, index] + index: [ meta2, index ] } BWA_MEM ( ch_input_for_mapping.reads, ch_input_for_mapping.index, true ) @@ -60,10 +58,9 @@ workflow MAP { .combine( index ) .multiMap { meta, reads, meta2, index -> - new_meta = meta.clone() - new_meta.reference = meta2.id + new_meta = meta + [ reference: meta2.id ] reads: [ new_meta, reads ] - index: [ meta2, index] + index: [ meta2, index ] } BOWTIE2_ALIGN ( ch_input_for_mapping.reads, ch_input_for_mapping.index, false, true ) diff --git a/workflows/eager.nf b/workflows/eager.nf index bafd24b64..57a4f33e1 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -295,12 +295,14 @@ workflow EAGER { bam: [ meta, bam ] snp_capture_bed: [ snp_capture_bed ] } + QUALIMAP_BAMQC_WITHBED( ch_qualimap_input_with.bam, ch_qualimap_input_with.snp_capture_bed ) ch_qualimap_input_without = ch_qualimap_input.nobed .map{ ignore_meta, meta, bam, meta2, snp_capture_bed -> [ meta, bam ] } + QUALIMAP_BAMQC_NOBED( ch_qualimap_input_without, [] ) ch_qualimap_output = QUALIMAP_BAMQC_WITHBED.out.results.mix( QUALIMAP_BAMQC_NOBED.out.results ) ch_versions = ch_versions.mix( QUALIMAP_BAMQC_NOBED.out.versions ).mix( QUALIMAP_BAMQC_WITHBED.out.versions )