From 80a40470f039fa35385596bd909ad9934e3f3789 Mon Sep 17 00:00:00 2001
From: scarlhoff <selina_carlhoff@eva.mpg.de>
Date: Fri, 7 Jul 2023 16:01:37 +0200
Subject: [PATCH 1/8] add ref sheet columns, update bwaaln

---
 conf/test_multiref.config                     |  2 +-
 modules.json                                  |  2 +-
 nextflow.config                               |  2 +-
 nextflow_schema.json                          |  6 --
 subworkflows/local/reference_indexing.nf      | 43 +++++++++-
 .../local/reference_indexing_multi.nf         | 67 ++++++++++------
 .../local/reference_indexing_single.nf        | 30 ++++++-
 .../nf-core/fastq_align_bwaaln/main.nf        | 79 +++++++++++++++++--
 .../nf-core/fastq_align_bwaaln/meta.yml       | 10 +++
 .../fastq_align_bwaaln/nextflow.config        | 26 ++++++
 workflows/eager.nf                            | 52 ++++++------
 11 files changed, 253 insertions(+), 66 deletions(-)
 create mode 100644 subworkflows/nf-core/fastq_align_bwaaln/nextflow.config

diff --git a/conf/test_multiref.config b/conf/test_multiref.config
index 23fcdb942..1e71b9922 100644
--- a/conf/test_multiref.config
+++ b/conf/test_multiref.config
@@ -23,7 +23,7 @@ params {
     input = 'https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/samplesheet_multilane_multilib.tsv'
 
     // Genome references
-    fasta = 'https://github.com/jfy133/nf-core-test-datasets/raw/eager/reference/reference_sheet_multiref.csv'
+    fasta = 'https://github.com/nf-core/test-datasets/raw/eager/reference/reference_sheet_multiref.csv'
 
     // BAM filtering
     run_bamfiltering                      = true
diff --git a/modules.json b/modules.json
index 566c4dbd2..e107375e4 100644
--- a/modules.json
+++ b/modules.json
@@ -206,7 +206,7 @@
                     },
                     "fastq_align_bwaaln": {
                         "branch": "master",
-                        "git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653",
+                        "git_sha": "e2c81fea3daeacfa190f78d2b82f82361b734507",
                         "installed_by": ["subworkflows"]
                     }
                 }
diff --git a/nextflow.config b/nextflow.config
index 3c7b88d89..82491f55d 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -17,7 +17,6 @@ params {
     fasta_dict                 = null
     fasta_mapperindexdir       = null
     fasta_circular_target      = null
-    fasta_mitochondrion_header = null
     fasta_largeref             = false
 
     // References
@@ -211,6 +210,7 @@ try {
 // Additional configs for subworkflows
 includeConfig 'subworkflows/nf-core/bam_split_by_region/nextflow.config'
 includeConfig 'subworkflows/nf-core/bam_docounts_contamination_angsd/nextflow.config'
+includeConfig 'subworkflows/nf-core/fastq_align_bwaaln/nextflow.config'
 
 profiles {
     debug {
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 044d4b635..234c85415 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -111,12 +111,6 @@
                     "description": "Specify the FASTA header of the target chromosome to extend. Only applies when using `circularmapper`.",
                     "help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`\n",
                     "fa_icon": "fas fa-bullseye"
-                },
-                "fasta_mitochondrion_header": {
-                    "type": "string",
-                    "fa_icon": "fas fa-tag",
-                    "description": "Specify the name of the reference FASTA entry corresponding to the mitochondrial genome, up to the first space. Only applies when using `--run_mtnucratio`.",
-                    "help_text": "Specify the FASTA entry in the reference file specified as `--fasta` that acts as the mitochondrial 'chromosome' to base a mitochondrial-to-nuclear ratio calculation on. \n\nThe tool only accepts the first section of the header before the first space. For example, mitochondrion chromosome name is `MT` for the hs37d5/GrCH37 human reference genome.\n"
                 }
             }
         },
diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf
index f91bf18f0..6cd6a051c 100644
--- a/subworkflows/local/reference_indexing.nf
+++ b/subworkflows/local/reference_indexing.nf
@@ -17,20 +17,59 @@ workflow REFERENCE_INDEXING {
 
     // Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc.
     if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (fasta_fai || fasta_dict || fasta_mapperindexdir)) log.warn("A TSV or CSV has been supplied to `--fasta` as well as e.g. `--fasta_fai`. --fasta CSV/TSV takes priority and --fasta_* parameters will be ignored.")
+    if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask )) log.warn("A TSV or CSV has been supplied to `--fasta` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta CSV/TSV take priority and other input parameters will be ignored.")
 
     if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) {
         // If input (multi-)reference sheet supplied
         REFERENCE_INDEXING_MULTI ( fasta )
         ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference
+        ch_mitochondrion_header  = REFERENCE_INDEXING_MULTI.out.mitochondrion_header
+        ch_hapmap                = REFERENCE_INDEXING_MULTI.out.hapmap
+        ch_pmd_mask              = REFERENCE_INDEXING_MULTI.out.pmd_mask
+        ch_snp_capture_bed       = REFERENCE_INDEXING_MULTI.out.snp_capture_bed
+        ch_snp_eigenstrat        = REFERENCE_INDEXING_MULTI.out.snp_eigenstrat
+        ch_sexdeterrmine_bed     = REFERENCE_INDEXING_MULTI.out.sexdeterrmine_bed
         ch_versions = ch_versions.mix( REFERENCE_INDEXING_MULTI.out.versions )
     } else {
         // If input FASTA and/or indicies supplied
         REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir )
+        ch_mitochondrion_header  = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header
+        ch_hapmap                = REFERENCE_INDEXING_SINGLE.out.hapmap
+        ch_pmd_mask              = REFERENCE_INDEXING_SINGLE.out.pmd_mask
+        ch_snp_capture_bed       = REFERENCE_INDEXING_SINGLE.out.snp_capture_bed
+        ch_snp_eigenstrat        = REFERENCE_INDEXING_SINGLE.out.snp_eigenstrat
+        ch_sexdeterrmine_bed     = REFERENCE_INDEXING_SINGLE.out.sexdeterrmine_bed
         ch_reference_for_mapping = REFERENCE_INDEXING_SINGLE.out.reference
         ch_versions = ch_versions.mix( REFERENCE_INDEXING_SINGLE.out.versions )
     }
+
+    // Filter out input options that are not provided
+    ch_mitochondrion_header = ch_mitochondrion_header
+                    .filter{ it[1] != "" }
+
+    ch_hapmap = ch_hapmap
+                    .filter{ it[1] != "" }
+
+    ch_pmd_mask = ch_pmd_mask
+                    .filter{ it[1] != "" && it[2] != "" }
+
+    ch_snp_capture_bed = ch_snp_capture_bed
+                    .filter{ it[1] != "" }
+
+    ch_snp_eigenstrat = ch_snp_eigenstrat
+                    .filter{ it[1] != "" }
+
+    ch_sexdeterrmine_bed = ch_sexdeterrmine_bed
+                    .filter{ it[1] != "" }
+
     emit:
-    reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ]
-    versions  = ch_versions
+    reference            = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ]
+    mitochondrion_header = ch_mitochondrion_header  // [ meta, mitochondrion_header ]
+    hapmap               = ch_hapmap                // [ meta, hapmap ]
+    pmd_mask             = ch_pmd_mask              // [ meta, masked_fasta, capture_bed ]
+    snp_capture_bed      = ch_snp_capture_bed       // [ meta, capture_bed ]
+    snp_eigenstrat       = ch_snp_eigenstrat        // [ meta, snp_eigenstrat ]
+    sexdeterrmine_bed    = ch_sexdeterrmine_bed     // [ meta, sexdet_bed ]
+    versions             = ch_versions
 
 }
diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf
index 754189d03..53f258868 100644
--- a/subworkflows/local/reference_indexing_multi.nf
+++ b/subworkflows/local/reference_indexing_multi.nf
@@ -37,7 +37,12 @@ workflow REFERENCE_INDEXING_MULTI {
                                                     def mapper_index    = row["mapper_index"] != "" ? file(row["mapper_index"], checkIfExists: true) : ""
                                                     def circular_target = row["circular_target"]
                                                     def mitochondrion   = row["mitochondrion_header"]
-                                                    [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ]
+                                                    def capture_bed     = row["snpcapture_bed"] != "" ? file(row["snpcapture_bed"], checkIfExists: true) : ""
+                                                    def snp_file        = row["pileupcaller_snpfile"] != "" ? file(row["pileupcaller_snpfile"], checkIfExists: true) : ""
+                                                    def hapmap          = row["hapmap_file"] != "" ? file(row["hapmap_file"], checkIfExists: true) : ""
+                                                    def pmd_mask        = row["pmdtools_masked_fasta"] != "" ? file(row["pmdtools_masked_fasta"], checkIfExists: true) : ""
+                                                    def sexdet_bed      = row["sexdeterrmine_snp_bed"] != "" ? file(row["sexdeterrmine_snp_bed"], checkIfExists: true) : ""
+                                                    [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, snp_file, hapmap, pmd_mask, sexdet_bed ]
                                             }
 
 
@@ -52,10 +57,22 @@ workflow REFERENCE_INDEXING_MULTI {
     // DECOMPRESSION
     //
 
+ch_input_from_referencesheet = ch_splitreferencesheet_for_branch
+                            .multiMap {
+                                meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, snp_file, hapmap, pmd_mask, sexdet_bed ->
+                                generated:            [ meta, fasta, fai, dict, mapper_index, circular_target ]
+                                mitochondrion_header: [ meta, mitochondrion ]
+                                angsd_hapmap:         [ meta, hapmap ]
+                                pmd_mask:             [ meta, pmd_mask, capture_bed ]
+                                snp_bed:              [ meta, capture_bed ]
+                                snp_eigenstrat:       [ meta, snp_file ]
+                                sexdeterrmine_bed:    [ meta, sexdet_bed ]
+                            }
+
     // Detect if fasta is gzipped or not
-    ch_fasta_for_gunzip = ch_splitreferencesheet_for_branch
+    ch_fasta_for_gunzip = ch_input_from_referencesheet.generated
                             .branch {
-                                meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ->
+                                meta, fasta, fai, dict, mapper_index, circular_target ->
                                     forgunzip: fasta.extension == "gz"
                                     skip: true
                             }
@@ -63,9 +80,9 @@ workflow REFERENCE_INDEXING_MULTI {
     // Pull out name/file to match cardinality for GUNZIP module
     ch_gunzip_input = ch_fasta_for_gunzip.forgunzip
         .multiMap {
-            meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ->
+            meta, fasta, fai, dict, mapper_index, circular_target ->
                 gunzip:    [ meta, fasta ]
-                remainder: [ meta, fai, dict, mapper_index, circular_target, mitochondrion ]
+                remainder: [ meta, fai, dict, mapper_index, circular_target ]
         }
 
 
@@ -83,7 +100,7 @@ workflow REFERENCE_INDEXING_MULTI {
     // Separate out non-faidxed references
     ch_fasta_for_faidx = ch_fasta_for_faiindexing
         .branch {
-            meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ->
+            meta, fasta, fai, dict, mapper_index, circular_target ->
                 forfaidx: fai == ""
                 skip: true
         }
@@ -92,9 +109,9 @@ workflow REFERENCE_INDEXING_MULTI {
     ch_faidx_input = ch_fasta_for_faidx
         .forfaidx
         .multiMap {
-            meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ->
+            meta, fasta, fai, dict, mapper_index, circular_target ->
                 faidx:      [ meta, fasta ]
-                remainder:  [ meta, fasta, dict, mapper_index, circular_target, mitochondrion ] // we drop fai here as we are going to make it
+                remainder:  [ meta, fasta, dict, mapper_index, circular_target ] // we drop fai here as we are going to make it
         }
 
     SAMTOOLS_FAIDX ( ch_faidx_input.faidx )
@@ -104,9 +121,9 @@ workflow REFERENCE_INDEXING_MULTI {
     ch_faidxed_formix =  SAMTOOLS_FAIDX.out.fai
                             .join( ch_faidx_input.remainder, failOnMismatch: true )
                             .map {
-                                meta, fai, fasta, dict, mapper_index, circular_target, mitochondrion ->
+                                meta, fai, fasta, dict, mapper_index, circular_target ->
 
-                                [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ]
+                                [ meta, fasta, fai, dict, mapper_index, circular_target ]
                             }
 
     // Mix back newly faidx'd references with the pre-indexed ones
@@ -118,7 +135,7 @@ workflow REFERENCE_INDEXING_MULTI {
 
     ch_fasta_for_dict = ch_fasta_for_dictindexing
         .branch {
-            meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ->
+            meta, fasta, fai, dict, mapper_index, circular_target ->
                 fordict: dict == ""
                 skip: true
         }
@@ -126,9 +143,9 @@ workflow REFERENCE_INDEXING_MULTI {
     ch_dict_input = ch_fasta_for_dict
         .fordict
         .multiMap {
-            meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ->
+            meta, fasta, fai, dict, mapper_index, circular_target ->
                 dict:      [ meta, fasta ]
-                remainder:  [ meta, fasta, fai, mapper_index, circular_target, mitochondrion ]
+                remainder:  [ meta, fasta, fai, mapper_index, circular_target ]
         }
 
     PICARD_CREATESEQUENCEDICTIONARY ( ch_dict_input.dict )
@@ -137,9 +154,9 @@ workflow REFERENCE_INDEXING_MULTI {
     ch_dicted_formix =  PICARD_CREATESEQUENCEDICTIONARY.out.reference_dict
                             .join( ch_dict_input.remainder, failOnMismatch: true )
                             .map {
-                                meta, dict, fasta, fai, mapper_index, circular_target, mitochondrion ->
+                                meta, dict, fasta, fai, mapper_index, circular_target ->
 
-                                [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ]
+                                [ meta, fasta, fai, dict, mapper_index, circular_target ]
                             }
 
     ch_dict_formapperindexing = ch_fasta_for_dict.skip.mix(ch_dicted_formix)
@@ -152,7 +169,7 @@ workflow REFERENCE_INDEXING_MULTI {
 
     ch_fasta_for_mapperindex = ch_dict_formapperindexing
         .branch {
-            meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ->
+            meta, fasta, fai, dict, mapper_index, circular_target ->
                 forindex: mapper_index == ""
                 skip: true
         }
@@ -160,9 +177,9 @@ workflow REFERENCE_INDEXING_MULTI {
     ch_mapindex_input = ch_fasta_for_mapperindex
         .forindex
         .multiMap {
-            meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ->
+            meta, fasta, fai, dict, mapper_index, circular_target ->
                 index:      [ meta, fasta ]
-                remainder:  [ meta, fasta, fai, dict, circular_target, mitochondrion ]
+                remainder:  [ meta, fasta, fai, dict, circular_target ]
         }
 
     if ( params.mapping_tool == "bwaaln" || params.mapping_tool == "bwamem" ) {
@@ -180,14 +197,20 @@ workflow REFERENCE_INDEXING_MULTI {
     ch_indexed_formix = ch_indexed_forremap
                             .join( ch_mapindex_input.remainder, failOnMismatch: true )
                             .map {
-                                meta, mapper_index, fasta, fai, dict, circular_target, mitochondrion ->
+                                meta, mapper_index, fasta, fai, dict, circular_target ->
 
-                                [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion ]
+                                [ meta, fasta, fai, dict, mapper_index, circular_target ]
                             }
 
     ch_indexmapper_for_reference = ch_fasta_for_mapperindex.skip.mix(ch_indexed_formix)
 
     emit:
-    reference = ch_indexmapper_for_reference // [ meta, fasta, fai, dict, mapindex, circular_target, mitochondrion ]
-    versions = ch_versions
+    reference            = ch_indexmapper_for_reference                      // [ meta, fasta, fai, dict, mapindex, circular_target ]
+    mitochondrion_header = ch_input_from_referencesheet.mitochondrion_header // [ meta, mitochondrion ]
+    hapmap               = ch_input_from_referencesheet.angsd_hapmap         // [ meta, hapmap ]
+    pmd_mask             = ch_input_from_referencesheet.pmd_mask             // [ meta, pmd_mask, capture_bed ]
+    snp_capture_bed      = ch_input_from_referencesheet.snp_bed              // [ meta, capture_bed ]
+    snp_eigenstrat       = ch_input_from_referencesheet.snp_eigenstrat       // [ meta, snp_eigenstrat ]
+    sexdeterrmine_bed    = ch_input_from_referencesheet.sexdeterrmine_bed    // [ meta, sexdet_bed ]
+    versions             = ch_versions
 }
diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf
index b32cfd4c8..25c852bc5 100644
--- a/subworkflows/local/reference_indexing_single.nf
+++ b/subworkflows/local/reference_indexing_single.nf
@@ -79,11 +79,35 @@ workflow REFERENCE_INDEXING_SINGLE {
                                 .join(ch_fasta_mapperindexdir, failOnMismatch: true)
                                 .map{
                                     meta, fasta, fai, dict, mapper_index ->
-                                    [ meta, fasta, fai, dict, mapper_index, params.fasta_circular_target, params.fasta_mitochondrion_header ]
+                                    //TODO: add params for snpcapturebed, snpeigenstrat and sexdetbed once implemented
+                                    def contamination_estimation_angsd_hapmap = params.contamination_estimation_angsd_hapmap != null ? file( params.contamination_estimation_angsd_hapmap, checkIfExists: true ) : ""
+                                    def pmd_mask                              = params.damage_manipulation_pmdtools_reference_mask != null ? file(params.damage_manipulation_pmdtools_reference_mask, checkIfExists: true ) : ""
+                                    def capture_bed                           = ""
+                                    def snp_eigenstrat                        = ""
+                                    def sexdet_bed                            = ""
+                                    [ meta, fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, snp_eigenstrat, sexdet_bed ]
+                                }
+
+    ch_ref_index_single = ch_reference_for_mapping
+                                .multiMap{
+                                    meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, snp_eigenstrat, sexdet_bed ->
+                                    reference:         [ meta, fasta, fai, dict, mapper_index, circular_target ]
+                                    mito_header:       [ meta, mitochondrion_header ]
+                                    hapmap:            [ meta, contamination_estimation_angsd_hapmap ]
+                                    pmd_mask:          [ meta, pmd_mask, capture_bed ]
+                                    snp_bed:           [ meta, capture_bed ]
+                                    snp_eigenstrat:    [ meta, snp_eigenstrat ]
+                                    sexdeterrmine_bed: [ meta, sexdet_bed ]
                                 }
 
     emit:
-    reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ]
-    versions  = ch_versions
+    reference            = ch_ref_index_single.reference          // [ meta, fasta, fai, dict, mapindex, circular_target ]
+    mitochondrion_header = ch_ref_index_single.mito_header        // [ meta, mito_header ]
+    hapmap               = ch_ref_index_single.hapmap             // [ meta, hapmap ]
+    pmd_mask             = ch_ref_index_single.pmd_mask           // [ meta, pmd_mask, capture_bed ]
+    snp_capture_bed      = ch_ref_index_single.snp_bed            // [ meta, capture_bed ]
+    snp_eigenstrat       = ch_ref_index_single.snp_eigenstrat     // [ meta, snp_eigenstrat ]
+    sexdeterrmine_bed    = ch_ref_index_single.sexdeterrmine_bed  // [ meta, sexdet_bed ]
+    versions             = ch_versions
 
 }
diff --git a/subworkflows/nf-core/fastq_align_bwaaln/main.nf b/subworkflows/nf-core/fastq_align_bwaaln/main.nf
index 3d2d6f607..647e83a31 100644
--- a/subworkflows/nf-core/fastq_align_bwaaln/main.nf
+++ b/subworkflows/nf-core/fastq_align_bwaaln/main.nf
@@ -17,10 +17,47 @@ workflow FASTQ_ALIGN_BWAALN {
 
     ch_versions = Channel.empty()
 
-    BWA_ALN ( ch_reads, ch_index )
+    // WARNING: You must specify in your prefix `meta.id_index` in your `modules.conf`
+    // to ensure that you do not overwrite multiple BAM files from one sample mapped
+    // against multiple references. This meta map is added by the subworkflow but can be removed
+    // after if necessary.
+
+    // Ensure when multiple references are provide, that reference/read combinations
+    // are correctly associated throughout the subworkflow by copying the sample
+    // specific metadata to the index on each combination
+
+    ch_prepped_input = ch_reads
+                        .combine(ch_index)
+                        .map{
+                            meta, reads, meta_index, index ->
+
+                                // Create a combined id that includes the ids of the reads and the index used.
+                                // Also keep the id of the index with a new name to avoid name collisions.
+                                def key_read_ref = meta.id + "_" + meta_index.id
+                                def id_index = meta_index.id
+
+                            [ meta + [key_read_ref: key_read_ref] + [id_index: id_index], reads, meta_index + [key_read_ref: key_read_ref]  + [id_index: id_index], index  ]
+                        }
+
+    // Drop the index_meta, as the id of the index is now kept within the read meta.
+    ch_preppedinput_for_bwaaln = ch_prepped_input
+                        .multiMap {
+                            meta, reads, meta_index, index ->
+                                reads: [ meta, reads ]
+                                index: [ meta, index ]
+                        }
+
+
+    // Set as independent channel to allow repeated joining but _with_ sample specific metadata
+    // to ensure right reference goes with right sample
+    ch_reads_newid = ch_prepped_input.map{ meta, reads, meta_index, index -> [ meta, reads ] }
+    ch_index_newid = ch_prepped_input.map{ meta, reads, meta_index, index -> [ meta, index ] }
+
+    // Alignment and conversion to bam
+    BWA_ALN ( ch_preppedinput_for_bwaaln.reads, ch_preppedinput_for_bwaaln.index )
     ch_versions = ch_versions.mix( BWA_ALN.out.versions.first() )
 
-    ch_sai_for_bam = ch_reads
+    ch_sai_for_bam = ch_reads_newid
                         .join ( BWA_ALN.out.sai )
                         .branch {
                             meta, reads, sai ->
@@ -28,22 +65,48 @@ workflow FASTQ_ALIGN_BWAALN {
                                 se: meta.single_end
                         }
 
-    BWA_SAMPE ( ch_sai_for_bam.pe, ch_index )
+    // Split as PE/SE have different SAI -> BAM commands
+    ch_sai_for_bam_pe =  ch_sai_for_bam.pe
+                            .join ( ch_index_newid )
+                            .multiMap {
+                                meta, reads, sai, index ->
+                                    reads: [ meta, reads, sai ]
+                                    index: [ meta, index      ]
+                            }
+
+    ch_sai_for_bam_se =  ch_sai_for_bam.se
+                            .join ( ch_index_newid )
+                            .multiMap {
+                                meta, reads, sai, index ->
+                                    reads: [ meta, reads, sai ]
+                                    index: [ meta, index      ]
+                            }
+
+
+    BWA_SAMPE ( ch_sai_for_bam_pe.reads, ch_sai_for_bam_pe.index )
     ch_versions = ch_versions.mix( BWA_SAMPE.out.versions.first() )
 
-    BWA_SAMSE ( ch_sai_for_bam.se, ch_index )
+    BWA_SAMSE ( ch_sai_for_bam_se.reads, ch_sai_for_bam_se.index )
     ch_versions = ch_versions.mix( BWA_SAMSE.out.versions.first() )
 
     ch_bam_for_index = BWA_SAMPE.out.bam.mix( BWA_SAMSE.out.bam )
 
+    // Index all
     SAMTOOLS_INDEX ( ch_bam_for_index )
     ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first())
 
+    // Remove superfluous internal maps to minimise clutter as much as possible
+    ch_bam_for_emit = ch_bam_for_index.map{ meta, bam -> [meta - meta.subMap('key_read_ref'), bam] }
+    ch_bai_for_emit = SAMTOOLS_INDEX.out.bai.map{ meta, bai -> [meta - meta.subMap('key_read_ref'), bai] }
+    ch_csi_for_emit = SAMTOOLS_INDEX.out.csi.map{ meta, csi -> [meta - meta.subMap('key_read_ref'), csi] }
+
     emit:
-    bam      = ch_bam_for_index           // channel: [ val(meta), path(bam) ]
-    bai      = SAMTOOLS_INDEX.out.bai     // channel: [ val(meta), path(bai) ]
-    csi      = SAMTOOLS_INDEX.out.csi     // channel: [ val(meta), path(csi) ]
+    // Note: output channels will contain meta with additional 'id_index' meta
+    // value to allow association of BAM file with the meta.id of input indicies
+    bam      = ch_bam_for_emit     // channel: [ val(meta), path(bam) ]
+    bai      = ch_bai_for_emit     // channel: [ val(meta), path(bai) ]
+    csi      = ch_csi_for_emit     // channel: [ val(meta), path(csi) ]
 
-    versions = ch_versions                // channel: [ path(versions.yml) ]
+    versions = ch_versions         // channel: [ path(versions.yml) ]
 }
 
diff --git a/subworkflows/nf-core/fastq_align_bwaaln/meta.yml b/subworkflows/nf-core/fastq_align_bwaaln/meta.yml
index 755550286..c38effd13 100644
--- a/subworkflows/nf-core/fastq_align_bwaaln/meta.yml
+++ b/subworkflows/nf-core/fastq_align_bwaaln/meta.yml
@@ -18,6 +18,9 @@ input:
       description: |
         Structure: [ val(meta), path(fastq) ]
         One or two FASTQ files for single or paired end data respectively.
+        Note: the subworkflow adds a new meta ID `meta.id_index` that _must_
+        be used in `prefix` to ensure unique file names. See the associated
+        nextflow.config file.
   - ch_index:
       description: |
         Structure: [ val(meta), path(index) ]
@@ -29,6 +32,9 @@ output:
       description: |
         Structure: [ val(meta), path(bam) ]
         Sorted BAM/CRAM/SAM file
+        Note: the subworkflow adds a new meta ID `meta.id_index` that _must_
+        be used in `prefix` to ensure unique file names. See the associated
+        nextflow.config file.
   - bai:
       description: |
         Structure: [ val(meta), path(bai) ]
@@ -37,5 +43,9 @@ output:
       description: |
         Structure: [ val(meta), path(csi) ]
         BAM/CRAM/SAM samtools index file for larger references
+  - versions:
+      description: |
+        Files containing software versions
+        Structure: [ path(versions.yml) ]
 authors:
   - "@jfy133"
diff --git a/subworkflows/nf-core/fastq_align_bwaaln/nextflow.config b/subworkflows/nf-core/fastq_align_bwaaln/nextflow.config
new file mode 100644
index 000000000..2ff165516
--- /dev/null
+++ b/subworkflows/nf-core/fastq_align_bwaaln/nextflow.config
@@ -0,0 +1,26 @@
+// IMPORTANT: Add this configuration to your modules.config
+// These settings are to ensure you include the reference information within the output file names, to prevent overwritting!
+// `meta.id_index` is created and used within the workflow. You must reuse this meta field if you wish to customise the file naming scheme.
+
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+    withName: '.*FASTQ_ALIGN_BWAALN:BWA_ALN' {
+        ext.prefix = { "${meta.id}_${meta.id_index}" }
+    }
+
+    withName: '.*FASTQ_ALIGN_BWAALN:BWA_SAMSE' {
+        ext.prefix = { "${meta.id}_${meta.id_index}" }
+    }
+
+    withName: '.*FASTQ_ALIGN_BWAALN:BWA_SAMPE' {
+        ext.prefix = { "${meta.id}_${meta.id_index}" }
+    }
+
+    withName: '.*FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX' {
+        ext.prefix = { "${meta.id}_${meta.id_index}" }
+    }
+
+
+}
diff --git a/workflows/eager.nf b/workflows/eager.nf
index 0911d4ca0..183717e5f 100644
--- a/workflows/eager.nf
+++ b/workflows/eager.nf
@@ -126,9 +126,6 @@ workflow EAGER {
         if ( params.preprocessing_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/eager] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --preprocessing_adapterlist ${params.preprocessing_adapterlist}"
     }
 
-    // Contamination estimation
-    hapmap_file = file(params.contamination_estimation_angsd_hapmap, checkIfExists:true)
-
     //
     // SUBWORKFLOW: Read in samplesheet, validate and stage input files
     //
@@ -137,11 +134,11 @@ workflow EAGER {
         file(params.input)
     )
     ch_versions = ch_versions.mix( INPUT_CHECK.out.versions )
-    
+
     // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input")
     // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/
     // ! There is currently no tooling to help you write a sample sheet schema
-    
+
     //
     // SUBWORKFLOW: Indexing of reference files
     //
@@ -181,7 +178,7 @@ workflow EAGER {
     //
     ch_reference_for_mapping = REFERENCE_INDEXING.out.reference
             .map{
-                meta, fasta, fai, dict, index, circular_target, mitochondrion ->
+                meta, fasta, fai, dict, index, circular_target ->
                 [ meta, index ]
             }
 
@@ -233,7 +230,7 @@ workflow EAGER {
 
     ch_fasta_for_deduplication = REFERENCE_INDEXING.out.reference
         .multiMap{
-            meta, fasta, fai, dict, index, circular_target, mitochondrion ->
+            meta, fasta, fai, dict, index, circular_target ->
             fasta:      [ meta, fasta ]
             fasta_fai:  [ meta, fai ]
         }
@@ -310,13 +307,31 @@ workflow EAGER {
     //
 
     if ( params.run_mtnucratio ) {
+        ch_mito_header = REFERENCE_INDEXING.out.mitochondrion_header
+            .map{
+                meta, mito_header ->
+                meta2 = [:]
+                meta2.reference = meta.id
+                [ meta2, meta, mito_header ]
+            }
         mtnucratio_input = ch_dedupped_bams
-        .map {
-            meta, bam, bai ->
-            [ meta, bam ]
-        }
+            .map {
+                meta, bam, bai ->
+                meta2 = [:]
+                meta2.reference = meta.reference
+                [ meta2, meta, bam ]
+            }
+            .combine(
+                by: 0,
+                ch_mito_header
+            )
+            .multiMap{
+                ignore_meta, meta, bam, meta2, mito_header ->
+                bam:         [ meta, bam ]
+                mito_header: [ meta2, mito_header ]
+            }
 
-        MTNUCRATIO( mtnucratio_input, params.mitochondrion_header )
+        MTNUCRATIO( mtnucratio_input.bam, mtnucratio_input.mito_header.map{ it[1] } )
         ch_multiqc_files = ch_multiqc_files.mix(MTNUCRATIO.out.mtnucratio.collect{it[1]}.ifEmpty([]))
         ch_versions      = ch_versions.mix( MTNUCRATIO.out.versions )
     }
@@ -341,7 +356,7 @@ workflow EAGER {
 
     ch_fasta_for_damagecalculation = REFERENCE_INDEXING.out.reference
         .multiMap{
-            meta, fasta, fai, dict, index, circular_target, mitochondrion ->
+            meta, fasta, fai, dict, index, circular_target ->
             fasta:      [ meta, fasta ]
             fasta_fai:  [ meta, fai ]
         }
@@ -359,22 +374,15 @@ workflow EAGER {
 
     if ( params.run_contamination_estimation_angsd ) {
         contamination_input = ch_dedupped_bams
-        ch_hapmap = Channel.of( [ hapmap_file ] )
-        hapmap_input = REFERENCE_INDEXING.out.reference
-            .combine( ch_hapmap )
-            .map {
-                meta, fasta, fai, dict, index, circular_target, mitochondrion, hapmap ->
-                [ meta, hapmap ]
-            }
 
-        ESTIMATE_CONTAMINATION( contamination_input, hapmap_input )
+        ESTIMATE_CONTAMINATION( contamination_input, REFERENCE_INDEXING.out.hapmap )
         ch_versions      = ch_versions.mix( ESTIMATE_CONTAMINATION.out.versions )
         ch_multiqc_files = ch_multiqc_files.mix( ESTIMATE_CONTAMINATION.out.mqc.collect{it[1]}.ifEmpty([]) )
     }
 
     //
     // SUBWORKFLOW: aDNA Damage Manipulation
-    //
+    // TODO: Add pmd_mask and snp_capture input
 
     if ( params.run_mapdamage_rescaling || params.run_pmd_filtering || params.run_trim_bam ) {
         MANIPULATE_DAMAGE( ch_dedupped_bams, ch_fasta_for_deduplication.fasta )

From 02af1fc199eb6da4afccfa8c8df4869d300e60ab Mon Sep 17 00:00:00 2001
From: scarlhoff <selina_carlhoff@eva.mpg.de>
Date: Fri, 4 Aug 2023 13:42:18 +0200
Subject: [PATCH 2/8] fix creation of multiple bwaaln jobs

---
 subworkflows/local/map.nf | 45 +++++++++++++++++++++++++++++----------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf
index 494acfb7b..3a5480d1e 100644
--- a/subworkflows/local/map.nf
+++ b/subworkflows/local/map.nf
@@ -19,8 +19,31 @@ workflow MAP {
     ch_versions       = Channel.empty()
     ch_multiqc_files  = Channel.empty()
 
-    ch_input_for_mapping = reads
-                            .combine(index)
+    if ( params.mapping_tool == 'bwaaln' ) {
+        ch_index_for_mapping = index
+            .map {
+                // Add meta.reference for tag and file name
+                meta, index ->
+                    new_meta = meta.clone()
+                    new_meta.reference = meta.id
+                [ new_meta, index ]
+            }
+        ch_reads_for_mapping = reads
+            .combine( ch_index_for_mapping )
+            .map {
+                // Add meta.reference for tag and file name
+                meta, reads, meta2, index ->
+                    meta.reference = meta2.reference
+                [ meta, reads]
+            }
+        FASTQ_ALIGN_BWAALN ( ch_reads_for_mapping, ch_index_for_mapping )
+        ch_versions        = ch_versions.mix ( FASTQ_ALIGN_BWAALN.out.versions.first() )
+        ch_mapped_lane_bam = FASTQ_ALIGN_BWAALN.out.bam
+        ch_mapped_lane_bai = params.fasta_largeref ? FASTQ_ALIGN_BWAALN.out.csi : FASTQ_ALIGN_BWAALN.out.bai
+
+    } else if ( params.mapping_tool == 'bwamem' ) {
+        ch_input_for_mapping = reads
+                            .combine( index )
                             .multiMap {
                                 meta, reads, meta2, index ->
                                     new_meta = meta.clone()
@@ -28,15 +51,6 @@ workflow MAP {
                                     reads: [ new_meta, reads ]
                                     index: [ meta2, index]
                             }
-
-    if ( params.mapping_tool == 'bwaaln' ) {
-        FASTQ_ALIGN_BWAALN ( ch_input_for_mapping.reads, ch_input_for_mapping.index )
-
-        ch_versions        = ch_versions.mix ( FASTQ_ALIGN_BWAALN.out.versions.first() )
-        ch_mapped_lane_bam = FASTQ_ALIGN_BWAALN.out.bam
-        ch_mapped_lane_bai = params.fasta_largeref ? FASTQ_ALIGN_BWAALN.out.csi : FASTQ_ALIGN_BWAALN.out.bai
-
-    } else if ( params.mapping_tool == 'bwamem' ) {
         BWA_MEM ( ch_input_for_mapping.reads, ch_input_for_mapping.index, true )
         ch_versions        = ch_versions.mix ( BWA_MEM.out.versions.first() )
         ch_mapped_lane_bam = BWA_MEM.out.bam
@@ -46,6 +60,15 @@ workflow MAP {
         ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_MEM.out.csi : SAMTOOLS_INDEX_MEM.out.bai
 
     } else if ( params.mapping_tool == 'bowtie2' ) {
+        ch_input_for_mapping = reads
+                            .combine( index )
+                            .multiMap {
+                                meta, reads, meta2, index ->
+                                    new_meta = meta.clone()
+                                    new_meta.reference = meta2.id
+                                    reads: [ new_meta, reads ]
+                                    index: [ meta2, index]
+                            }
         BOWTIE2_ALIGN ( ch_input_for_mapping.reads, ch_input_for_mapping.index, false, true )
         ch_versions        = ch_versions.mix ( BOWTIE2_ALIGN.out.versions.first() )
         ch_mapped_lane_bam = BOWTIE2_ALIGN.out.bam

From 35ac27cfd277dbee07df97dce62c5925e1e36fee Mon Sep 17 00:00:00 2001
From: scarlhoff <selina_carlhoff@eva.mpg.de>
Date: Fri, 25 Aug 2023 17:48:59 +0200
Subject: [PATCH 3/8] actually fix multiple bwa aln mapping (?)

---
 conf/modules.config       |  6 +++---
 subworkflows/local/map.nf | 37 +++++++++++++++++--------------------
 2 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 354628576..5c67d64fe 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -352,7 +352,7 @@ process {
     // READ MAPPING
     //
     withName: BWA_ALN {
-        tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
+        tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
         ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" }
         ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
         publishDir = [
@@ -361,7 +361,7 @@ process {
     }
 
     withName: 'BWA_SAMSE|BWA_SAMPE' {
-        tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
+        tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
         ext.args = { "-r '@RG\\tID:ILLUMINA-${meta.library_id}\\tSM:${meta.sample_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}'" }
         ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
         publishDir = [
@@ -370,7 +370,7 @@ process {
     }
 
     withName: ".*MAP:FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX" {
-        tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
+        tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
         ext.args = { params.fasta_largeref ? "-c" : "" }
         ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
         publishDir = [
diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf
index 3a5480d1e..2f43d4f06 100644
--- a/subworkflows/local/map.nf
+++ b/subworkflows/local/map.nf
@@ -2,13 +2,13 @@
 // Prepare reference indexing for downstream
 //
 
-include { FASTQ_ALIGN_BWAALN                                                                                                  } from '../../subworkflows/nf-core/fastq_align_bwaaln/main'
-include { BWA_MEM                                                                                                             } from '../../modules/nf-core/bwa/mem/main'
-include { BOWTIE2_ALIGN                                                                                                       } from '../../modules/nf-core/bowtie2/align/main'
-include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES                                                                              } from '../../modules/nf-core/samtools/merge/main'
-include { SAMTOOLS_SORT  as SAMTOOLS_SORT_MERGED_LANES                                                                        } from '../../modules/nf-core/samtools/sort/main'
+include { FASTQ_ALIGN_BWAALN                                                                                                        } from '../../subworkflows/nf-core/fastq_align_bwaaln/main'
+include { BWA_MEM                                                                                                                   } from '../../modules/nf-core/bwa/mem/main'
+include { BOWTIE2_ALIGN                                                                                                             } from '../../modules/nf-core/bowtie2/align/main'
+include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES                                                                                    } from '../../modules/nf-core/samtools/merge/main'
+include { SAMTOOLS_SORT  as SAMTOOLS_SORT_MERGED_LANES                                                                              } from '../../modules/nf-core/samtools/sort/main'
 include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM; SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2; SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main'
-include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED                                                                       } from '../../modules/nf-core/samtools/flagstat/main'
+include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED                                                                             } from '../../modules/nf-core/samtools/flagstat/main'
 
 workflow MAP {
     take:
@@ -21,24 +21,19 @@ workflow MAP {
 
     if ( params.mapping_tool == 'bwaaln' ) {
         ch_index_for_mapping = index
-            .map {
-                // Add meta.reference for tag and file name
-                meta, index ->
-                    new_meta = meta.clone()
-                    new_meta.reference = meta.id
-                [ new_meta, index ]
-            }
         ch_reads_for_mapping = reads
-            .combine( ch_index_for_mapping )
-            .map {
-                // Add meta.reference for tag and file name
-                meta, reads, meta2, index ->
-                    meta.reference = meta2.reference
-                [ meta, reads]
-            }
+
         FASTQ_ALIGN_BWAALN ( ch_reads_for_mapping, ch_index_for_mapping )
         ch_versions        = ch_versions.mix ( FASTQ_ALIGN_BWAALN.out.versions.first() )
         ch_mapped_lane_bam = FASTQ_ALIGN_BWAALN.out.bam
+                                .map{
+                                    // create meta consistent with rest of workflow
+                                    meta, bam ->
+                                    new_meta = meta.clone()
+                                    new_meta.reference = meta.id_index
+                                [ new_meta, bam ]
+                                }
+
         ch_mapped_lane_bai = params.fasta_largeref ? FASTQ_ALIGN_BWAALN.out.csi : FASTQ_ALIGN_BWAALN.out.bai
 
     } else if ( params.mapping_tool == 'bwamem' ) {
@@ -51,6 +46,7 @@ workflow MAP {
                                     reads: [ new_meta, reads ]
                                     index: [ meta2, index]
                             }
+
         BWA_MEM ( ch_input_for_mapping.reads, ch_input_for_mapping.index, true )
         ch_versions        = ch_versions.mix ( BWA_MEM.out.versions.first() )
         ch_mapped_lane_bam = BWA_MEM.out.bam
@@ -69,6 +65,7 @@ workflow MAP {
                                     reads: [ new_meta, reads ]
                                     index: [ meta2, index]
                             }
+
         BOWTIE2_ALIGN ( ch_input_for_mapping.reads, ch_input_for_mapping.index, false, true )
         ch_versions        = ch_versions.mix ( BOWTIE2_ALIGN.out.versions.first() )
         ch_mapped_lane_bam = BOWTIE2_ALIGN.out.bam

From 17949b91cf509e35799b382ccaf9b9458a018700 Mon Sep 17 00:00:00 2001
From: scarlhoff <selina_carlhoff@eva.mpg.de>
Date: Fri, 22 Sep 2023 15:06:27 +0200
Subject: [PATCH 4/8] starting to integrate bedtools and qualimap

---
 conf/test_humanbam.config                     |  3 +
 subworkflows/local/reference_indexing.nf      | 33 +++++--
 .../local/reference_indexing_multi.nf         | 14 +--
 .../local/reference_indexing_single.nf        | 16 ++--
 workflows/eager.nf                            | 85 +++++++++----------
 5 files changed, 89 insertions(+), 62 deletions(-)

diff --git a/conf/test_humanbam.config b/conf/test_humanbam.config
index 8439f86f2..47ba00e93 100644
--- a/conf/test_humanbam.config
+++ b/conf/test_humanbam.config
@@ -31,6 +31,9 @@ params {
     contamination_estimation_angsd_mapq = 0
     contamination_estimation_angsd_minq = 0
 
+    // Qualimap
+    snpcapture_bed = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'
+
     // TODO Reactivate sexDet and genotyping params when those steps get implemented.
     // //Sex Determination
     // sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'
diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf
index 6cd6a051c..9d5e523f3 100644
--- a/subworkflows/local/reference_indexing.nf
+++ b/subworkflows/local/reference_indexing.nf
@@ -4,6 +4,7 @@
 
 include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf'
 include { REFERENCE_INDEXING_MULTI  } from '../../subworkflows/local/reference_indexing_multi.nf'
+include { GUNZIP as GUNZIP_SNPBED   } from '../../modules/nf-core/gunzip/main.nf'
 
 workflow REFERENCE_INDEXING {
     take:
@@ -17,7 +18,7 @@ workflow REFERENCE_INDEXING {
 
     // Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc.
     if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (fasta_fai || fasta_dict || fasta_mapperindexdir)) log.warn("A TSV or CSV has been supplied to `--fasta` as well as e.g. `--fasta_fai`. --fasta CSV/TSV takes priority and --fasta_* parameters will be ignored.")
-    if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask )) log.warn("A TSV or CSV has been supplied to `--fasta` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta CSV/TSV take priority and other input parameters will be ignored.")
+    if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.mapstats_bedtools_featurefile )) log.warn("A TSV or CSV has been supplied to `--fasta` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta CSV/TSV take priority and other input parameters will be ignored.")
 
     if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) {
         // If input (multi-)reference sheet supplied
@@ -27,8 +28,9 @@ workflow REFERENCE_INDEXING {
         ch_hapmap                = REFERENCE_INDEXING_MULTI.out.hapmap
         ch_pmd_mask              = REFERENCE_INDEXING_MULTI.out.pmd_mask
         ch_snp_capture_bed       = REFERENCE_INDEXING_MULTI.out.snp_capture_bed
-        ch_snp_eigenstrat        = REFERENCE_INDEXING_MULTI.out.snp_eigenstrat
+        ch_pileupcaller_snp      = REFERENCE_INDEXING_MULTI.out.pileupcaller_snp
         ch_sexdeterrmine_bed     = REFERENCE_INDEXING_MULTI.out.sexdeterrmine_bed
+        ch_bedtools_feature      = REFERENCE_INDEXING_MULTI.out.bedtools_feature
         ch_versions = ch_versions.mix( REFERENCE_INDEXING_MULTI.out.versions )
     } else {
         // If input FASTA and/or indicies supplied
@@ -37,8 +39,9 @@ workflow REFERENCE_INDEXING {
         ch_hapmap                = REFERENCE_INDEXING_SINGLE.out.hapmap
         ch_pmd_mask              = REFERENCE_INDEXING_SINGLE.out.pmd_mask
         ch_snp_capture_bed       = REFERENCE_INDEXING_SINGLE.out.snp_capture_bed
-        ch_snp_eigenstrat        = REFERENCE_INDEXING_SINGLE.out.snp_eigenstrat
+        ch_pileupcaller_snp      = REFERENCE_INDEXING_SINGLE.out.pileupcaller_snp
         ch_sexdeterrmine_bed     = REFERENCE_INDEXING_SINGLE.out.sexdeterrmine_bed
+        ch_bedtools_feature      = REFERENCE_INDEXING_SINGLE.out.bedtools_feature
         ch_reference_for_mapping = REFERENCE_INDEXING_SINGLE.out.reference
         ch_versions = ch_versions.mix( REFERENCE_INDEXING_SINGLE.out.versions )
     }
@@ -53,23 +56,37 @@ workflow REFERENCE_INDEXING {
     ch_pmd_mask = ch_pmd_mask
                     .filter{ it[1] != "" && it[2] != "" }
 
-    ch_snp_capture_bed = ch_snp_capture_bed
+    ch_capture_bed = ch_snp_capture_bed
                     .filter{ it[1] != "" }
+    if ( ch_capture_bed != "") {
+        ch_capture_bed_gunzip = ch_capture_bed
+                    .branch {
+                        meta, capture_bed ->
+                        forgunzip: capture_bed.extension == "gz"
+                        skip: true
+                    }
+        GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip )
+        ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip )
+    }
 
-    ch_snp_eigenstrat = ch_snp_eigenstrat
-                    .filter{ it[1] != "" }
+    ch_pileupcaller_snp = ch_pileupcaller_snp
+                    .filter{ it[1] != "" && it[2] != "" }
 
     ch_sexdeterrmine_bed = ch_sexdeterrmine_bed
                     .filter{ it[1] != "" }
 
+    ch_bedtools_feature = ch_bedtools_feature
+                    .filter{ it[1] != "" }
+
     emit:
     reference            = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ]
     mitochondrion_header = ch_mitochondrion_header  // [ meta, mitochondrion_header ]
     hapmap               = ch_hapmap                // [ meta, hapmap ]
     pmd_mask             = ch_pmd_mask              // [ meta, masked_fasta, capture_bed ]
-    snp_capture_bed      = ch_snp_capture_bed       // [ meta, capture_bed ]
-    snp_eigenstrat       = ch_snp_eigenstrat        // [ meta, snp_eigenstrat ]
+    snp_capture_bed      = ch_capture_bed           // [ meta, capture_bed ]
+    pileupcaller_snp     = ch_pileupcaller_snp      // [ meta, pileupcaller_bed, pileupcaller_snp ]
     sexdeterrmine_bed    = ch_sexdeterrmine_bed     // [ meta, sexdet_bed ]
+    bedtools_feature     = ch_bedtools_feature      // [ meta, bedtools_feature ]
     versions             = ch_versions
 
 }
diff --git a/subworkflows/local/reference_indexing_multi.nf b/subworkflows/local/reference_indexing_multi.nf
index 53f258868..1ca630498 100644
--- a/subworkflows/local/reference_indexing_multi.nf
+++ b/subworkflows/local/reference_indexing_multi.nf
@@ -38,11 +38,13 @@ workflow REFERENCE_INDEXING_MULTI {
                                                     def circular_target = row["circular_target"]
                                                     def mitochondrion   = row["mitochondrion_header"]
                                                     def capture_bed     = row["snpcapture_bed"] != "" ? file(row["snpcapture_bed"], checkIfExists: true) : ""
-                                                    def snp_file        = row["pileupcaller_snpfile"] != "" ? file(row["pileupcaller_snpfile"], checkIfExists: true) : ""
+                                                    def pileupcaller_bed = row["pileupcaller_bedfile"] != "" ? file(row["pileupcaller_bedfile"], checkIfExists: true) : ""
+                                                    def pileupcaller_snp = row["pileupcaller_snpfile"] != "" ? file(row["pileupcaller_snpfile"], checkIfExists: true) : ""
                                                     def hapmap          = row["hapmap_file"] != "" ? file(row["hapmap_file"], checkIfExists: true) : ""
                                                     def pmd_mask        = row["pmdtools_masked_fasta"] != "" ? file(row["pmdtools_masked_fasta"], checkIfExists: true) : ""
                                                     def sexdet_bed      = row["sexdeterrmine_snp_bed"] != "" ? file(row["sexdeterrmine_snp_bed"], checkIfExists: true) : ""
-                                                    [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, snp_file, hapmap, pmd_mask, sexdet_bed ]
+                                                    def bedtools_feature = row["bedtools_feature_file"] != "" ? file(row["bedtools_feature_file"], checkIfExists: true) : ""
+                                                    [ meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_mask, sexdet_bed, bedtools_feature ]
                                             }
 
 
@@ -59,14 +61,15 @@ workflow REFERENCE_INDEXING_MULTI {
 
 ch_input_from_referencesheet = ch_splitreferencesheet_for_branch
                             .multiMap {
-                                meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, snp_file, hapmap, pmd_mask, sexdet_bed ->
+                                meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion, capture_bed, pileupcaller_bed, pileupcaller_snp, hapmap, pmd_mask, sexdet_bed, bedtools_feature ->
                                 generated:            [ meta, fasta, fai, dict, mapper_index, circular_target ]
                                 mitochondrion_header: [ meta, mitochondrion ]
                                 angsd_hapmap:         [ meta, hapmap ]
                                 pmd_mask:             [ meta, pmd_mask, capture_bed ]
                                 snp_bed:              [ meta, capture_bed ]
-                                snp_eigenstrat:       [ meta, snp_file ]
+                                pileupcaller_snp:     [ meta, pileupcaller_bed, pileupcaller_snp ]
                                 sexdeterrmine_bed:    [ meta, sexdet_bed ]
+                                bedtools_feature:     [ meta, bedtools_feature ]
                             }
 
     // Detect if fasta is gzipped or not
@@ -210,7 +213,8 @@ ch_input_from_referencesheet = ch_splitreferencesheet_for_branch
     hapmap               = ch_input_from_referencesheet.angsd_hapmap         // [ meta, hapmap ]
     pmd_mask             = ch_input_from_referencesheet.pmd_mask             // [ meta, pmd_mask, capture_bed ]
     snp_capture_bed      = ch_input_from_referencesheet.snp_bed              // [ meta, capture_bed ]
-    snp_eigenstrat       = ch_input_from_referencesheet.snp_eigenstrat       // [ meta, snp_eigenstrat ]
+    pileupcaller_snp     = ch_input_from_referencesheet.pileupcaller_snp     // [ meta, pileupcaller_snp, pileupcaller_bed ]
     sexdeterrmine_bed    = ch_input_from_referencesheet.sexdeterrmine_bed    // [ meta, sexdet_bed ]
+    bedtools_feature     = ch_input_from_referencesheet.bedtools_feature     // [ meta, bedtools_feature ]
     versions             = ch_versions
 }
diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf
index 25c852bc5..43ebbf262 100644
--- a/subworkflows/local/reference_indexing_single.nf
+++ b/subworkflows/local/reference_indexing_single.nf
@@ -82,22 +82,25 @@ workflow REFERENCE_INDEXING_SINGLE {
                                     //TODO: add params for snpcapturebed, snpeigenstrat and sexdetbed once implemented
                                     def contamination_estimation_angsd_hapmap = params.contamination_estimation_angsd_hapmap != null ? file( params.contamination_estimation_angsd_hapmap, checkIfExists: true ) : ""
                                     def pmd_mask                              = params.damage_manipulation_pmdtools_reference_mask != null ? file(params.damage_manipulation_pmdtools_reference_mask, checkIfExists: true ) : ""
-                                    def capture_bed                           = ""
-                                    def snp_eigenstrat                        = ""
+                                    def capture_bed                           = params.snpcapture_bed != null ? file(params.snpcapture_bed, checkIfExists: true ) : ""
+                                    def pileupcaller_bed                      = ""
+                                    def pileupcaller_snp                      = ""
                                     def sexdet_bed                            = ""
-                                    [ meta, fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, snp_eigenstrat, sexdet_bed ]
+                                    def bedtools_feature                      = params.mapstats_bedtools_featurefile != null ? file(params.mapstats_bedtools_featurefile, checkIfExists: true ) : ""
+                                    [ meta, fasta, fai, dict, mapper_index, params.fasta_circular_target, params.mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature ]
                                 }
 
     ch_ref_index_single = ch_reference_for_mapping
                                 .multiMap{
-                                    meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, snp_eigenstrat, sexdet_bed ->
+                                    meta, fasta, fai, dict, mapper_index, circular_target, mitochondrion_header, contamination_estimation_angsd_hapmap, pmd_mask, capture_bed, pileupcaller_bed, pileupcaller_snp, sexdet_bed, bedtools_feature ->
                                     reference:         [ meta, fasta, fai, dict, mapper_index, circular_target ]
                                     mito_header:       [ meta, mitochondrion_header ]
                                     hapmap:            [ meta, contamination_estimation_angsd_hapmap ]
                                     pmd_mask:          [ meta, pmd_mask, capture_bed ]
                                     snp_bed:           [ meta, capture_bed ]
-                                    snp_eigenstrat:    [ meta, snp_eigenstrat ]
+                                    pileupcaller_snp:  [ meta, pileupcaller_bed, pileupcaller_snp ]
                                     sexdeterrmine_bed: [ meta, sexdet_bed ]
+                                    bedtools_feature:  [ meta, bedtools_feature ]
                                 }
 
     emit:
@@ -106,8 +109,9 @@ workflow REFERENCE_INDEXING_SINGLE {
     hapmap               = ch_ref_index_single.hapmap             // [ meta, hapmap ]
     pmd_mask             = ch_ref_index_single.pmd_mask           // [ meta, pmd_mask, capture_bed ]
     snp_capture_bed      = ch_ref_index_single.snp_bed            // [ meta, capture_bed ]
-    snp_eigenstrat       = ch_ref_index_single.snp_eigenstrat     // [ meta, snp_eigenstrat ]
+    pileupcaller_snp     = ch_ref_index_single.pileupcaller_snp   // [ meta, pileupcaller_bed, pileupcaller_snp ]
     sexdeterrmine_bed    = ch_ref_index_single.sexdeterrmine_bed  // [ meta, sexdet_bed ]
+    bedtools_feature     = ch_ref_index_single.bedtools_feature   // [ meta, bedtools_feature ]
     versions             = ch_versions
 
 }
diff --git a/workflows/eager.nf b/workflows/eager.nf
index 04148ebe8..b34f1634e 100644
--- a/workflows/eager.nf
+++ b/workflows/eager.nf
@@ -101,7 +101,6 @@ include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTATS_BAM_INPUT } from '../modules/n
 include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH ; BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main'
 include { SAMTOOLS_VIEW_GENOME                              } from '../modules/local/samtools_view_genome.nf'
 include { QUALIMAP_BAMQC                                    } from '../modules/nf-core/qualimap/bamqc/main'
-include { GUNZIP as GUNZIP_SNPBED                           } from '../modules/nf-core/gunzip/main.nf'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -138,28 +137,6 @@ workflow EAGER {
         if ( params.preprocessing_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/eager] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --preprocessing_adapterlist ${params.preprocessing_adapterlist}"
     }
 
-    // QualiMap
-    if ( params.snpcapture_bed ) {
-      ch_snpcapture_bed_gunzip = Channel.fromPath( params.snpcapture_bed, checkIfExists: true )
-        .collect()
-        .map {
-          file ->
-              meta = file.simpleName
-          [meta,file]
-        }
-        .branch {
-          meta, bed ->
-            forgunzip: bed[0].extension == "gz"
-            skip: true
-        }
-      ch_snpcapture_bed = GUNZIP_SNPBED(ch_snpcapture_bed_gunzip.forgunzip).gunzip.mix(ch_snpcapture_bed_gunzip.skip).map{it[1]}
-
-    } else {
-      ch_snpcapture_bed = []
-    }
-
-    // Contamination estimation
-    hapmap_file = file(params.contamination_estimation_angsd_hapmap, checkIfExists:true)
 
     //
     // SUBWORKFLOW: Read in samplesheet, validate and stage input files
@@ -296,13 +273,30 @@ workflow EAGER {
     //
 
     if ( !params.skip_qualimap ) {
-       ch_qualimap_input = ch_dedupped_bams
+        ch_snp_capture_bed = REFERENCE_INDEXING.out.snp_capture_bed
+            .map{
+                WorkflowEager.addNewMetaFromAttributes( it, "id" , "reference" , false )
+            }
+        ch_qualimap_input = ch_dedupped_bams
             .map {
             meta, bam, bai ->
                 [ meta, bam ]
             }
-        QUALIMAP_BAMQC(ch_qualimap_input, ch_snpcapture_bed)
-        ch_versions      = ch_versions.mix( QUALIMAP_BAMQC.out.versions )
+            .map {
+                WorkflowEager.addNewMetaFromAttributes( it, "reference" , "reference" , false )
+            }
+            .combine(
+                by: 0,
+                ch_snp_capture_bed
+            )
+            .multiMap{
+                ignore_meta, meta, bam, meta2, snp_capture_bed ->
+                bam:             [ meta, bam ]
+                snp_capture_bed: [ snp_capture_bed ]
+            }
+        QUALIMAP_BAMQC( ch_qualimap_input.bam, ch_qualimap_input.snp_capture_bed )
+        ch_versions = ch_versions.mix( QUALIMAP_BAMQC.out.versions )
+        // Qualimap multiqc files?
     }
 
     //
@@ -367,17 +361,11 @@ workflow EAGER {
     if ( params.run_mtnucratio ) {
         ch_mito_header = REFERENCE_INDEXING.out.mitochondrion_header
             .map{
-                meta, mito_header ->
-                meta2 = [:]
-                meta2.reference = meta.id
-                [ meta2, meta, mito_header ]
+                WorkflowEager.addNewMetaFromAttributes( it, "id" , "reference" , false )
             }
         mtnucratio_input = ch_dedupped_bams
             .map {
-                meta, bam, bai ->
-                meta2 = [:]
-                meta2.reference = meta.reference
-                [ meta2, meta, bam ]
+                WorkflowEager.addNewMetaFromAttributes( it, "reference" , "reference" , false )
             }
             .combine(
                 by: 0,
@@ -386,7 +374,7 @@ workflow EAGER {
             .multiMap{
                 ignore_meta, meta, bam, meta2, mito_header ->
                 bam:         [ meta, bam ]
-                mito_header: [ meta2, mito_header ]
+                mito_header: [ meta2, mito_header ] //should this be meta, mito_header?
             }
 
         MTNUCRATIO( mtnucratio_input.bam, mtnucratio_input.mito_header.map{ it[1] } )
@@ -453,12 +441,22 @@ workflow EAGER {
 
     if ( params.run_bedtools_coverage ) {
 
-        ch_anno_for_bedtools = Channel.fromPath(params.mapstats_bedtools_featurefile, checkIfExists: true).collect()
-
-        ch_dedupped_for_bedtools = ch_dedupped_bams.combine(ch_anno_for_bedtools)
-        .map{
-              meta, bam, bai, anno ->
-                [meta, anno, bam]
+        ch_bedtools_feature = REFERENCE_INDEXING.out.bedtools_feature
+                                .map{
+                                    WorkflowEager.addNewMetaFromAttributes( it, "id" , "reference" , false )
+                                }
+
+        ch_bedtools_input = ch_dedupped_bams
+                    .map {
+                        WorkflowEager.addNewMetaFromAttributes( it, "reference" , "reference" , false )
+                    }
+                    .combine(
+                        by: 0,
+                        ch_bedtools_feature
+                    )
+                    .map{
+                        ignore_meta, meta, bam, bai, meta2, bedtools_feature ->
+                        [ meta, bedtools_feature, bam ]
             }
 
         // Running samtools view to get header
@@ -466,12 +464,13 @@ workflow EAGER {
 
         ch_genome_for_bedtools = SAMTOOLS_VIEW_GENOME.out.genome
 
-        BEDTOOLS_COVERAGE_BREADTH(ch_dedupped_for_bedtools, ch_genome_for_bedtools)
-        BEDTOOLS_COVERAGE_DEPTH(ch_dedupped_for_bedtools, ch_genome_for_bedtools)
+        BEDTOOLS_COVERAGE_BREADTH(ch_bedtools_input, ch_genome_for_bedtools)
+        BEDTOOLS_COVERAGE_DEPTH(ch_bedtools_input, ch_genome_for_bedtools)
 
         ch_versions = ch_versions.mix( SAMTOOLS_VIEW_GENOME.out.versions )
         ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_BREADTH.out.versions )
         ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_DEPTH.out.versions )
+        //Bedtools multiqc files?
     }
 
     //

From 17453404e05ceafef698c2da38a6e2be921650d8 Mon Sep 17 00:00:00 2001
From: scarlhoff <selina_carlhoff@eva.mpg.de>
Date: Fri, 29 Sep 2023 15:13:48 +0200
Subject: [PATCH 5/8] started to make qualimap bed input optional

---
 conf/modules.config                      |  2 +-
 subworkflows/local/reference_indexing.nf | 16 +++++++++-------
 workflows/eager.nf                       | 22 +++++++++++++++++-----
 3 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 4f17a6cc7..619283ada 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -819,7 +819,7 @@ process {
         ]
     }
 
-    withName: "QUALIMAP_BAMQC" {
+    withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' {
         tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
         publishDir = [
             path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/${meta.sample_id}/}" },
diff --git a/subworkflows/local/reference_indexing.nf b/subworkflows/local/reference_indexing.nf
index 9d5e523f3..13357d4ea 100644
--- a/subworkflows/local/reference_indexing.nf
+++ b/subworkflows/local/reference_indexing.nf
@@ -56,18 +56,20 @@ workflow REFERENCE_INDEXING {
     ch_pmd_mask = ch_pmd_mask
                     .filter{ it[1] != "" && it[2] != "" }
 
-    ch_capture_bed = ch_snp_capture_bed
-                    .filter{ it[1] != "" }
-    if ( ch_capture_bed != "") {
-        ch_capture_bed_gunzip = ch_capture_bed
+    ch_capture_bed = ch_snp_capture_bed //optional
+                    .branch {
+                        meta, capture_bed ->
+                        input: capture_bed != ""
+                        skip: true
+                    }
+    ch_capture_bed_gunzip = ch_capture_bed.input //unzip
                     .branch {
                         meta, capture_bed ->
                         forgunzip: capture_bed.extension == "gz"
                         skip: true
                     }
-        GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip )
-        ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip )
-    }
+    GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip )
+    ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip ).mix( ch_capture_bed.skip )
 
     ch_pileupcaller_snp = ch_pileupcaller_snp
                     .filter{ it[1] != "" && it[2] != "" }
diff --git a/workflows/eager.nf b/workflows/eager.nf
index b34f1634e..bd9a75b1f 100644
--- a/workflows/eager.nf
+++ b/workflows/eager.nf
@@ -100,7 +100,7 @@ include { ENDORSPY                                          } from '../modules/n
 include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTATS_BAM_INPUT } from '../modules/nf-core/samtools/flagstat/main'
 include { BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_DEPTH ; BEDTOOLS_COVERAGE as BEDTOOLS_COVERAGE_BREADTH } from '../modules/nf-core/bedtools/coverage/main'
 include { SAMTOOLS_VIEW_GENOME                              } from '../modules/local/samtools_view_genome.nf'
-include { QUALIMAP_BAMQC                                    } from '../modules/nf-core/qualimap/bamqc/main'
+include { QUALIMAP_BAMQC as QUALIMAP_BAMQC_NOBED ; QUALIMAP_BAMQC as QUALIMAP_BAMQC_WITHBED             } from '../modules/nf-core/qualimap/bamqc/main'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -289,14 +289,26 @@ workflow EAGER {
                 by: 0,
                 ch_snp_capture_bed
             )
+            .branch {
+                ignore_meta, meta, bam, meta2, snp_capture_bed ->
+                withbed: snp_capture_bed != ""
+                nobed: true
+            }
+        ch_qualimap_input_with = ch_qualimap_input.withbed
             .multiMap{
                 ignore_meta, meta, bam, meta2, snp_capture_bed ->
                 bam:             [ meta, bam ]
                 snp_capture_bed: [ snp_capture_bed ]
             }
-        QUALIMAP_BAMQC( ch_qualimap_input.bam, ch_qualimap_input.snp_capture_bed )
-        ch_versions = ch_versions.mix( QUALIMAP_BAMQC.out.versions )
-        // Qualimap multiqc files?
+        QUALIMAP_BAMQC_WITHBED( ch_qualimap_input_with.bam, ch_qualimap_input_with.snp_capture_bed )
+        ch_qualimap_input_without = ch_qualimap_input.nobed
+            .map{
+                ignore_meta, meta, bam, meta2, snp_capture_bed ->
+                [ meta, bam ]
+            }
+        QUALIMAP_BAMQC_NOBED( ch_qualimap_input_without, [] )
+        ch_qualimap_output = QUALIMAP_BAMQC_WITHBED.out.results.mix( QUALIMAP_BAMQC_NOBED.out.results )
+        ch_versions = ch_versions.mix( QUALIMAP_BAMQC_NOBED.out.versions ).mix( QUALIMAP_BAMQC_WITHBED.out.versions )
     }
 
     //
@@ -536,7 +548,7 @@ workflow EAGER {
     //ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) // Replaced with custom mixing
 
     if ( !params.skip_qualimap ) {
-        ch_multiqc_files = ch_multiqc_files.mix( QUALIMAP_BAMQC.out.results.collect{it[1]}.ifEmpty([]) )
+        ch_multiqc_files = ch_multiqc_files.mix( ch_qualimap_output.collect{it[1]}.ifEmpty([]) )
     }
 
     MULTIQC (

From c43cca43b71c13958990618b76e6583b0cc6cc4a Mon Sep 17 00:00:00 2001
From: scarlhoff <selina_carlhoff@eva.mpg.de>
Date: Fri, 6 Oct 2023 11:10:36 +0200
Subject: [PATCH 6/8] fix mtnucratio

---
 workflows/eager.nf | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/workflows/eager.nf b/workflows/eager.nf
index bd9a75b1f..b9a36b0de 100644
--- a/workflows/eager.nf
+++ b/workflows/eager.nf
@@ -384,9 +384,9 @@ workflow EAGER {
                 ch_mito_header
             )
             .multiMap{
-                ignore_meta, meta, bam, meta2, mito_header ->
+                ignore_meta, meta, bam, bai, meta2, mito_header ->
                 bam:         [ meta, bam ]
-                mito_header: [ meta2, mito_header ] //should this be meta, mito_header?
+                mito_header: [ meta2, mito_header ]
             }
 
         MTNUCRATIO( mtnucratio_input.bam, mtnucratio_input.mito_header.map{ it[1] } )

From 7dca621547d118da3990b589637b5004cb0e9927 Mon Sep 17 00:00:00 2001
From: scarlhoff <selina_carlhoff@eva.mpg.de>
Date: Fri, 6 Oct 2023 15:19:15 +0200
Subject: [PATCH 7/8] bedtools multiref integration

---
 docs/development/manual_tests.md |  7 ++++++-
 workflows/eager.nf               | 30 ++++++++++++++++++------------
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/docs/development/manual_tests.md b/docs/development/manual_tests.md
index a02f22588..d339d4ba7 100644
--- a/docs/development/manual_tests.md
+++ b/docs/development/manual_tests.md
@@ -94,10 +94,11 @@ Tool Specific combinations
     - with stricter threshold
 
   - BAM trimming
+
     - with default parameters
     - different length by udg treatment
 
-- All together
+  - All together
 
 ### Multi-reference tests
 
@@ -145,6 +146,10 @@ nextflow run ../main.nf -profile singularity,test --outdir ./results --input sam
 ## Test: (11) Broken path correctly fails pipeline ✅
 ## Expect: Expect fail
 nextflow run ../main.nf -profile singularity,test --outdir ./results --input samplesheet.tsv --fasta reference_sheet_multiref_test11.csv -ansi-log false -dump-channels --save_reference
+
+# Test: File input via reference sheet
+# Expect: Qualimap with bed, mtnucratio and angsd successful and bedtools not run for hs37d5, qualimap without bed file, mtnucratio and bedtools successful and angsd not run for Mammoth_MT
+nextflow run main.nf -profile test_multiref,docker --outdir ./results --run_bedtools_coverage --run_contamination_estimation_angsd --run_mtnucratio
 ```
 
 ### AdapterRemoval
diff --git a/workflows/eager.nf b/workflows/eager.nf
index b9a36b0de..3b38ec2fc 100644
--- a/workflows/eager.nf
+++ b/workflows/eager.nf
@@ -32,11 +32,6 @@ if ( params.metagenomics_complexity_tool == 'prinseq' && params.metagenomics_pri
             exit 1, ("[nf-core/eager] ERROR: Metagenomics: You picked PRINSEQ++ with 'entropy' mode but provided a dust score. Please specify an entropy filter threshold using the --metagenomics_complexity_entropy flag")
     }
 }
-if( params.run_bedtools_coverage ){
-    if( !params.mapstats_bedtools_featurefile ) {
-        exit 1, "[nf-core/eager] ERROR: you have turned on bedtools coverage, but not specified a BED or GFF file with --mapstats_bedtools_featurefile. Please validate your parameters."
-    }
-}
 
 // TODO What to do when params.preprocessing_excludeunmerged is provided but the data is SE?
 if ( params.deduplication_tool == 'dedup' && ! params.preprocessing_excludeunmerged ) { exit 1, "[nf-core/eager] ERROR: Dedup can only be used on collapsed (i.e. merged) PE reads. For all other cases, please set --deduplication_tool to 'markduplicates'."}
@@ -458,7 +453,7 @@ workflow EAGER {
                                     WorkflowEager.addNewMetaFromAttributes( it, "id" , "reference" , false )
                                 }
 
-        ch_bedtools_input = ch_dedupped_bams
+        ch_bedtools_prep = ch_dedupped_bams
                     .map {
                         WorkflowEager.addNewMetaFromAttributes( it, "reference" , "reference" , false )
                     }
@@ -468,21 +463,32 @@ workflow EAGER {
                     )
                     .map{
                         ignore_meta, meta, bam, bai, meta2, bedtools_feature ->
-                        [ meta, bedtools_feature, bam ]
-            }
+                        [ meta, bedtools_feature, bam, bai ]
+                    }
+                    .branch{
+                        meta, bedtools_feature, bam, bai ->
+                        withfeature: bedtools_feature != ""
+                        nobed: true
+                    }
 
         // Running samtools view to get header
-        SAMTOOLS_VIEW_GENOME(ch_dedupped_bams)
+        ch_bedtools_input = ch_bedtools_prep.withfeature
+                        .multiMap{
+                            meta, bedtools_feature, bam, bai ->
+                            bam: [ meta, bam, bai ]
+                            withfeature: [ meta, bedtools_feature, bam ]
+                        }
+
+        SAMTOOLS_VIEW_GENOME( ch_bedtools_input.bam )
 
         ch_genome_for_bedtools = SAMTOOLS_VIEW_GENOME.out.genome
 
-        BEDTOOLS_COVERAGE_BREADTH(ch_bedtools_input, ch_genome_for_bedtools)
-        BEDTOOLS_COVERAGE_DEPTH(ch_bedtools_input, ch_genome_for_bedtools)
+        BEDTOOLS_COVERAGE_BREADTH(ch_bedtools_input.withfeature, ch_genome_for_bedtools)
+        BEDTOOLS_COVERAGE_DEPTH(ch_bedtools_input.withfeature, ch_genome_for_bedtools)
 
         ch_versions = ch_versions.mix( SAMTOOLS_VIEW_GENOME.out.versions )
         ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_BREADTH.out.versions )
         ch_versions = ch_versions.mix( BEDTOOLS_COVERAGE_DEPTH.out.versions )
-        //Bedtools multiqc files?
     }
 
     //

From 6e2abeb47b3e31788264f9e1337e1c5c73f2f5aa Mon Sep 17 00:00:00 2001
From: scarlhoff <selina_carlhoff@eva.mpg.de>
Date: Fri, 20 Oct 2023 11:07:51 +0200
Subject: [PATCH 8/8] integrate review comments

---
 subworkflows/local/map.nf | 13 +++++--------
 workflows/eager.nf        |  2 ++
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/subworkflows/local/map.nf b/subworkflows/local/map.nf
index 5d196aaca..dc38315ab 100644
--- a/subworkflows/local/map.nf
+++ b/subworkflows/local/map.nf
@@ -29,8 +29,7 @@ workflow MAP {
                                 .map{
                                     // create meta consistent with rest of workflow
                                     meta, bam ->
-                                    new_meta = meta.clone()
-                                    new_meta.reference = meta.id_index
+                                    new_meta = meta + [ reference: meta.id_index ]
                                 [ new_meta, bam ]
                                 }
 
@@ -41,10 +40,9 @@ workflow MAP {
                             .combine( index )
                             .multiMap {
                                 meta, reads, meta2, index ->
-                                    new_meta = meta.clone()
-                                    new_meta.reference = meta2.id
+                                    new_meta = meta + [ reference: meta2.id ]
                                     reads: [ new_meta, reads ]
-                                    index: [ meta2, index]
+                                    index: [ meta2, index ]
                             }
 
         BWA_MEM ( ch_input_for_mapping.reads, ch_input_for_mapping.index, true )
@@ -60,10 +58,9 @@ workflow MAP {
                             .combine( index )
                             .multiMap {
                                 meta, reads, meta2, index ->
-                                    new_meta = meta.clone()
-                                    new_meta.reference = meta2.id
+                                    new_meta = meta + [ reference: meta2.id ]
                                     reads: [ new_meta, reads ]
-                                    index: [ meta2, index]
+                                    index: [ meta2, index ]
                             }
 
         BOWTIE2_ALIGN ( ch_input_for_mapping.reads, ch_input_for_mapping.index, false, true )
diff --git a/workflows/eager.nf b/workflows/eager.nf
index bafd24b64..57a4f33e1 100644
--- a/workflows/eager.nf
+++ b/workflows/eager.nf
@@ -295,12 +295,14 @@ workflow EAGER {
                 bam:             [ meta, bam ]
                 snp_capture_bed: [ snp_capture_bed ]
             }
+
         QUALIMAP_BAMQC_WITHBED( ch_qualimap_input_with.bam, ch_qualimap_input_with.snp_capture_bed )
         ch_qualimap_input_without = ch_qualimap_input.nobed
             .map{
                 ignore_meta, meta, bam, meta2, snp_capture_bed ->
                 [ meta, bam ]
             }
+
         QUALIMAP_BAMQC_NOBED( ch_qualimap_input_without, [] )
         ch_qualimap_output = QUALIMAP_BAMQC_WITHBED.out.results.mix( QUALIMAP_BAMQC_NOBED.out.results )
         ch_versions = ch_versions.mix( QUALIMAP_BAMQC_NOBED.out.versions ).mix( QUALIMAP_BAMQC_WITHBED.out.versions )