Skip to content

Commit

Permalink
Add library merge SWF
Browse files Browse the repository at this point in the history
  • Loading branch information
TCLamnidis committed Jan 30, 2024
1 parent 17a84c7 commit fd9776e
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 2 deletions.
43 changes: 43 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -883,4 +883,47 @@ process {
enabled: true
]
}

//
// LIBRARY MERGE
//

withName: SAMTOOLS_MERGE_LIBRARIES {
tag = { "${meta.reference}|${meta.sample_id}" }
ext.prefix = { "${meta.sample_id}_${meta.reference}_unsorted" }
publishDir = [
enabled: false
]
}

withName: SAMTOOLS_SORT_MERGED_LIBRARIES {
tag = { "${meta.reference}|${meta.sample_id}" }
ext.prefix = { "${meta.sample_id}_${meta.reference}" }
publishDir = [
path: { "${params.outdir}/library_merge/" },
mode: params.publish_dir_mode,
pattern: '*.bam'
]
}

withName: SAMTOOLS_INDEX_MERGED_LIBRARIES {
tag = { "${meta.reference}|${meta.sample_id}" }
ext.args = { params.fasta_largeref ? "-c" : "" }
ext.prefix = { "${meta.sample_id}_${meta.reference}" }
publishDir = [
path: { "${params.outdir}/library_merge/" },
mode: params.publish_dir_mode,
pattern: '*.{bai,csi}'
]
}

withName: SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES {
tag = { "${meta.reference}|${meta.sample_id}" }
ext.prefix = { "${meta.sample_id}_${meta.reference}" }
publishDir = [
path: { "${params.outdir}/library_merge/" },
mode: params.publish_dir_mode,
pattern: '*.flagstat'
]
}
}
53 changes: 53 additions & 0 deletions subworkflows/local/merge_libraries.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
//
// Merge libraries of the same sample, then sort, index, and flagstat the merged bam
//

include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LIBRARIES } from '../../modules/nf-core/samtools/merge/main'
include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LIBRARIES } from '../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LIBRARIES } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES } from '../../modules/nf-core/samtools/flagstat/main'

workflow MERGE_LIBRARIES {
take:
ch_bam_bai // [ [ meta ], bam , bai ]

main:
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

ch_library_merge_input = ch_bam_bai
// TODO add 'id_index' to final meta? (once that also gets added to bam input). Maybe also keep SE/PE? (for now, we assume SE, see comment below)
.map { WorkflowEager.addNewMetaFromAttributes( it, ["id", "sample_id", "strandedness", "reference"], ["id", "sample_id", "strandedness", "reference"], false ) }
.groupTuple(by: 0)
// Discrad library-level metas, and bais. Add single_end: true to all metas (no SE/PE distinction at this point, right?)
.map {
meta, lib_metas, bam, bai ->
[ meta + [ 'single_end':true ], bam ]
}

SAMTOOLS_MERGE_LIBRARIES ( ch_library_merge_input, [], [] )
ch_versions = ch_versions.mix( SAMTOOLS_MERGE_LIBRARIES.out.versions.first() )

SAMTOOLS_SORT_MERGED_LIBRARIES ( SAMTOOLS_MERGE_LIBRARIES.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_SORT_MERGED_LIBRARIES.out.versions.first() )

SAMTOOLS_INDEX_MERGED_LIBRARIES ( SAMTOOLS_SORT_MERGED_LIBRARIES.out.bam )
ch_versions = ch_versions.mix( SAMTOOLS_INDEX_MERGED_LIBRARIES.out.versions.first() )

// Join merged sample-level bams and their bais for genotyping
ch_merged_bams = SAMTOOLS_SORT_MERGED_LIBRARIES.out.bam
.join( SAMTOOLS_INDEX_MERGED_LIBRARIES.out.bai )

// Not sure if FLAGSTAT is really needed, but added here for completeness
SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES ( ch_merged_bams )
ch_versions = ch_versions.mix( SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES.out.versions.first() )

ch_merged_flagstat = SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES.out.flagstat
ch_multiqc_files = ch_multiqc_files.mix( SAMTOOLS_FLAGSTAT_MERGED_LIBRARIES.out.flagstat )

emit:
bam_bai = ch_merged_bams // [ [ meta ], bam , bai ]
flagstat = ch_merged_flagstat // [ [ meta ], flagstat ]
versions = ch_versions
mqc = ch_multiqc_files // Same as flagstat
}
16 changes: 14 additions & 2 deletions workflows/eager.nf
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ include { MANIPULATE_DAMAGE } from '../subworkflows/local/manipulate
include { METAGENOMICS_COMPLEXITYFILTER } from '../subworkflows/local/metagenomics_complexityfilter'
include { ESTIMATE_CONTAMINATION } from '../subworkflows/local/estimate_contamination'
include { CALCULATE_DAMAGE } from '../subworkflows/local/calculate_damage'
include { MERGE_LIBRARIES } from '../subworkflows/local/merge_libraries'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -530,11 +531,22 @@ workflow EAGER {
MANIPULATE_DAMAGE( ch_dedupped_bams, ch_fasta_for_deduplication.fasta, REFERENCE_INDEXING.out.pmd_masking )
ch_multiqc_files = ch_multiqc_files.mix( MANIPULATE_DAMAGE.out.flagstat.collect{it[1]}.ifEmpty([]) )
ch_versions = ch_versions.mix( MANIPULATE_DAMAGE.out.versions )
ch_bams_for_genotyping = params.genotyping_source == 'rescaled' ? MANIPULATE_DAMAGE.out.rescaled : params.genotyping_source == 'pmd' ? MANIPULATE_DAMAGE.out.filtered : params.genotyping_source == 'trimmed' ? MANIPULATE_DAMAGE.out.trimmed : ch_dedupped_bams
ch_bams_for_library_merge = params.genotyping_source == 'rescaled' ? MANIPULATE_DAMAGE.out.rescaled : params.genotyping_source == 'pmd' ? MANIPULATE_DAMAGE.out.filtered : params.genotyping_source == 'trimmed' ? MANIPULATE_DAMAGE.out.trimmed : ch_dedupped_bams
} else {
ch_bams_for_genotyping = ch_dedupped_bams
ch_bams_for_library_merge = ch_dedupped_bams
}

//
// SUBWORKFLOW: MERGE LIBRARIES
//

// The bams being merged are always the ones specified by params.genotyping_source,
// unless the user skipped damage manipulation, in which case it is the DEDUPLICATION output.
MERGE_LIBRARIES ( ch_bams_for_library_merge )
ch_versions = ch_versions.mix( MERGE_LIBRARIES.out.versions )
ch_bams_for_genotyping = MERGE_LIBRARIES.out.bam_bai
ch_multiqc_files = ch_multiqc_files.mix( MERGE_LIBRARIES.out.mqc.collect{it[1]}.ifEmpty([]) ) // Not sure if this is needed, or if it needs to be moved to line 564?

//
// MODULE: MultiQC
//
Expand Down

0 comments on commit fd9776e

Please sign in to comment.