diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 1a55a16c..4493905e 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -72,7 +72,7 @@ module_order: anchor: "mlib_featurecounts" info: "This section of the report shows featureCounts results for the number of reads assigned to merged library consensus peaks." path_filters: - - "./macs/consensus/*.summary" + - "./macs2/featurecounts/*.summary" report_section_order: peak_count: diff --git a/conf/modules.config b/conf/modules.config index 47784e03..b46fec83 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -396,7 +396,6 @@ process { withName: 'PHANTOMPEAKQUALTOOLS' { ext.args2 = { "-p=$task.cpus" } - ext.prefix = { "${meta.id}.mLb.clN" } publishDir = [ path: { "${params.outdir}/${params.aligner}/mergedLibrary/phantompeakqualtools" }, mode: params.publish_dir_mode, @@ -599,7 +598,8 @@ if (!params.skip_peak_annotation) { } withName: 'PLOT_HOMER_ANNOTATEPEAKS' { - ext.args = '-o ./ -p macs2_annotatePeaks' + ext.args = '-o ./' + ext.prefix = 'macs2_annotatePeaks' publishDir = [ path: { [ "${params.outdir}/${params.aligner}/mergedLibrary/macs2", @@ -623,7 +623,8 @@ if (!params.skip_consensus_peaks) { path: { [ "${params.outdir}/${params.aligner}/mergedLibrary/macs2", params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus' + '/consensus', + "/${meta.id}" ].join('') }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } @@ -632,11 +633,13 @@ if (!params.skip_consensus_peaks) { withName: 'SUBREAD_FEATURECOUNTS' { ext.args = '-F SAF -O --fracOverlap 0.2' + ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ path: { [ "${params.outdir}/${params.aligner}/mergedLibrary/macs2", params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus' + '/consensus', + "/${meta.id}" ].join('') }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } @@ -648,24 +651,27 @@ if (!params.skip_consensus_peaks) { process { withName: 'HOMER_ANNOTATEPEAKS_CONSENSUS' { ext.args = '-gid' - ext.prefix = 'consensus_peaks' + ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ path: { [ "${params.outdir}/${params.aligner}/mergedLibrary/macs2", params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus' + '/consensus', + "/${meta.id}" ].join('') }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: 'ANNOTATE_BOOLEAN_PEAKS' { - ext.prefix = { "${meta.id}_peaks" } + ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ path: { [ "${params.outdir}/${params.aligner}/mergedLibrary/macs2", params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus' + '/consensus', + "/${meta.id}" ].join('') }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } @@ -684,11 +690,13 @@ if (!params.skip_consensus_peaks) { '--count_col 7', params.deseq2_vst ? '--vst TRUE' : '' ].join(' ').trim() + ext.prefix = { "${meta.id}.consensus_peaks" } publishDir = [ path: { [ "${params.outdir}/${params.aligner}/mergedLibrary/macs2", params.narrow_peak? '/narrowPeak' : '/broadPeak', '/consensus', + "/${meta.id}", '/deseq2' ].join('') }, mode: params.publish_dir_mode, diff --git a/conf/test.config b/conf/test.config index 2fad3bc8..9b24bc9a 100644 --- a/conf/test.config +++ b/conf/test.config @@ -11,13 +11,13 @@ */ params { - config_profile_name = 'Test profile' + config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 6.h + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_test.csv' diff --git a/conf/test_full.config b/conf/test_full.config index f47445a5..d25c37d7 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,7 +17,7 @@ params { // Input data for full size test input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_full.csv' - // Used to get macs_gsize + // Used to calculate --macs_gsize read_length = 50 // Genome references diff --git a/docs/output.md b/docs/output.md index 6fde66de..b2c9bd0d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -261,7 +261,7 @@ The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to co **This pipeline uses a standardised DESeq2 analysis script to get an idea of the reproducibility within the experiment, and to assess the overall differential binding. Please note that this will not suit every experimental design, and if there are other problems with the experiment then it may not work as well as expected.** -For larger experiments, it may be recommended to use the `vst` transformation instead of the default `rlog` option. You can do this by providing the `--deseq2_vst` parameter to the pipeline. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation. +For larger experiments, it is recommended to use the `vst` transformation instead of the `rlog` option. This is the default behaviour and can be controlled with the `--deseq2_vst` parameter. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation. ![MultiQC - DESeq2 PCA plot](images/mqc_deseq2_pca_plot.png) diff --git a/lib/WorkflowChipseq.groovy b/lib/WorkflowChipseq.groovy index b46f9362..dfae806f 100755 --- a/lib/WorkflowChipseq.groovy +++ b/lib/WorkflowChipseq.groovy @@ -12,7 +12,7 @@ class WorkflowChipseq { if (!params.fasta) { - log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." + log.error "Genome fasta file not specified with e.g. '--fasta' or via a detectable config file." System.exit(1) } @@ -98,7 +98,7 @@ class WorkflowChipseq { log.warn "=============================================================================\n" + " --macs_gsize parameter has not been provided.\n" + " It will be auto-calculated by 'khmer unique-kmers.py' using the '--read_length' parameter.\n" + - " Explicitly provide '--macs_gsize macs2_genome_size' to change this behaviour.\n" + + " Explicitly provide '--macs_gsize' to change this behaviour.\n" + "===================================================================================" } diff --git a/modules/local/annotate_boolean_peaks.nf b/modules/local/annotate_boolean_peaks.nf index 8d45fe92..ab2dfbed 100644 --- a/modules/local/annotate_boolean_peaks.nf +++ b/modules/local/annotate_boolean_peaks.nf @@ -1,5 +1,5 @@ process ANNOTATE_BOOLEAN_PEAKS { - + tag "$meta.id" label 'process_low' conda (params.enable_conda ? "conda-forge::sed=4.7" : null) diff --git a/modules/local/deseq2_qc.nf b/modules/local/deseq2_qc.nf index bdf5535a..7fc5a9f2 100644 --- a/modules/local/deseq2_qc.nf +++ b/modules/local/deseq2_qc.nf @@ -27,8 +27,7 @@ process DESEQ2_QC { script: def args = task.ext.args ?: '' def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' - def antibody = meta.antibody - def prefix = "${antibody}.consensus_peaks" + def prefix = task.ext.prefix ?: "${meta.id}" """ deseq2_qc.r \\ --count_file $counts \\ @@ -38,11 +37,11 @@ process DESEQ2_QC { $args sed 's/deseq2_pca/deseq2_pca_${task.index}/g' <$deseq2_pca_header >tmp.txt - sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt + sed -i -e 's/DESeq2 /${meta.id} DESeq2 /g' tmp.txt cat tmp.txt ${prefix}.pca.vals.txt > ${prefix}.pca.vals_mqc.tsv sed 's/deseq2_clustering/deseq2_clustering_${task.index}/g' <$deseq2_clustering_header >tmp.txt - sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt + sed -i -e 's/DESeq2 /${meta.id} DESeq2 /g' tmp.txt cat tmp.txt ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/frip_score.nf b/modules/local/frip_score.nf index e8fdbcc9..337b18c5 100644 --- a/modules/local/frip_score.nf +++ b/modules/local/frip_score.nf @@ -20,7 +20,7 @@ process FRIP_SCORE { """ READS_IN_PEAKS=\$(intersectBed -a $bam -b $peak $args | awk -F '\t' '{sum += \$NF} END {print sum}') samtools flagstat $bam > ${bam}.flagstat - grep 'mapped (' ${bam}.flagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${prefix}", a/\$1}' > ${prefix}.FRiP.txt + grep 'mapped (' ${bam}.flagstat | grep -v "primary" | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${prefix}", a/\$1}' > ${prefix}.FRiP.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index 92824f01..702b239f 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -31,8 +31,10 @@ process MULTIQC { path ('alignment/mergedLibrary/filtered/picard_metrics/*') path ('preseq/*') + path ('deeptools/*') path ('deeptools/*') + path ('phantompeakqualtools/*') path ('phantompeakqualtools/*') path ('phantompeakqualtools/*') @@ -41,8 +43,7 @@ process MULTIQC { path ('macs2/peaks/*') path ('macs2/peaks/*') path ('macs2/annotation/*') - - path ('featurecounts/*') + path ('macs2/featurecounts/*') path ('deseq2/*') path ('deseq2/*') diff --git a/modules/local/multiqc_custom_peaks.nf b/modules/local/multiqc_custom_peaks.nf index 1ca41d4c..ebef7b13 100644 --- a/modules/local/multiqc_custom_peaks.nf +++ b/modules/local/multiqc_custom_peaks.nf @@ -1,5 +1,5 @@ process MULTIQC_CUSTOM_PEAKS { - + tag "$meta.id" conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : diff --git a/modules/local/multiqc_custom_phantompeakqualtools.nf b/modules/local/multiqc_custom_phantompeakqualtools.nf index 706af4fc..4878e2c2 100644 --- a/modules/local/multiqc_custom_phantompeakqualtools.nf +++ b/modules/local/multiqc_custom_phantompeakqualtools.nf @@ -1,4 +1,5 @@ process MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS { + tag "$meta.id" conda (params.enable_conda ? "conda-forge::r-base=3.5.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-base:3.5.1': diff --git a/modules/local/plot_homer_annotatepeaks.nf b/modules/local/plot_homer_annotatepeaks.nf index 2cb38a37..11ed3dab 100644 --- a/modules/local/plot_homer_annotatepeaks.nf +++ b/modules/local/plot_homer_annotatepeaks.nf @@ -19,13 +19,15 @@ process PLOT_HOMER_ANNOTATEPEAKS { script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "annotatepeaks" """ plot_homer_annotatepeaks.r \\ -i ${annos.join(',')} \\ -s ${annos.join(',').replaceAll("${suffix}","")} \\ + -p $prefix \\ $args - find ./ -type f -name "*.txt" -exec cat {} \\; | cat $mqc_header - > annotatepeaks.summary_mqc.tsv + find ./ -type f -name "*.txt" -exec cat {} \\; | cat $mqc_header - > ${prefix}.summary_mqc.tsv cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 801b0c2f..e7c2b71d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -50,7 +50,7 @@ params { skip_consensus_peaks = false // Options: DESeq2 QC - deseq2_vst = false + deseq2_vst = true skip_deseq2_qc = false // Options: QC diff --git a/nextflow_schema.json b/nextflow_schema.json index ea0721e1..2a5f445c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -348,7 +348,8 @@ "type": "boolean", "description": "Use vst transformation instead of rlog with DESeq2.", "help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).", - "fa_icon": "fas fa-dolly" + "fa_icon": "fas fa-dolly", + "default": true }, "skip_plot_profile": { "type": "boolean", diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf index eedccc5d..db6699e4 100644 --- a/workflows/chipseq.nf +++ b/workflows/chipseq.nf @@ -182,7 +182,7 @@ workflow CHIPSEQ { } // - // SUBWORKFLOW: Alignment with BOWTIE2 & BAM QC + // SUBWORKFLOW: Alignment with Bowtie2 & BAM QC // if (params.aligner == 'bowtie2') { ALIGN_BOWTIE2 ( @@ -199,7 +199,7 @@ workflow CHIPSEQ { } // - // SUBWORKFLOW: Alignment with CHROMAP & BAM QC + // SUBWORKFLOW: Alignment with Chromap & BAM QC // if (params.aligner == 'chromap') { ALIGN_CHROMAP ( @@ -210,7 +210,7 @@ workflow CHIPSEQ { // Filter out paired-end reads until the issue below is fixed // https://github.com/nf-core/chipseq/issues/291 - // ch_genome_bam = ALIGN_CHROMAP.out.bam + // ch_genome_bam = ALIGN_CHROMAP.out.bam ALIGN_CHROMAP .out .bam @@ -220,19 +220,22 @@ workflow CHIPSEQ { return [ meta, bam ] paired_end: !meta.single_end return [ meta, bam ] - }.set { ch_genome_bam_chromap } + } + .set { ch_genome_bam_chromap } - ch_genome_bam_chromap.paired_end + ch_genome_bam_chromap + .paired_end .collect() - .map { it -> - def count = it.size() - if (count > 0) { - log.warn "=============================================================================\n" + - " Paired-end files produced by chromap can not be used by some downstream tools due the issue below:\n" + - " https://github.com/nf-core/chipseq/issues/291\n" + - " They will be excluded from the analysis. Consider to use a different aligner\n" + - "===================================================================================" - } + .map { + it -> + def count = it.size() + if (count > 0) { + log.warn "=============================================================================\n" + + " Paired-end files produced by chromap cannot be used by some downstream tools due to the issue below:\n" + + " https://github.com/nf-core/chipseq/issues/291\n" + + " They will be excluded from the analysis. Consider using a different aligner\n" + + "===================================================================================" + } } ch_genome_bam = ch_genome_bam_chromap.single_end @@ -263,16 +266,21 @@ workflow CHIPSEQ { } // - // SUBWORKFLOW: Merge resequenced BAM files + // MODULE: Merge resequenced BAM files // ch_genome_bam .map { meta, bam -> - def fmeta = meta.findAll { it.key != 'read_group' } - fmeta.id = fmeta.id.split('_')[0..-2].join('_') - [ fmeta, bam ] } + def meta_clone = meta.clone() + meta_clone.remove('read_group') + meta_clone.id = meta_clone.id.split('_')[0..-2].join('_') + [ meta_clone, bam ] + } .groupTuple(by: [0]) - .map { it -> [ it[0], it[1].flatten() ] } + .map { + it -> + [ it[0], it[1].flatten() ] + } .set { ch_sort_bam } PICARD_MERGESAMFILES ( @@ -294,14 +302,13 @@ workflow CHIPSEQ { FILTER_BAM_BAMTOOLS ( MARK_DUPLICATES_PICARD.out.bam.join(MARK_DUPLICATES_PICARD.out.bai, by: [0]), PREPARE_GENOME.out.filtered_bed.first(), - ch_bamtools_filter_se_config, ch_bamtools_filter_pe_config ) ch_versions = ch_versions.mix(FILTER_BAM_BAMTOOLS.out.versions.first().ifEmpty(null)) // - // MODULE: Library coverage + // MODULE: Preseq coverage analysis // ch_preseq_multiqc = Channel.empty() if (!params.skip_preseq) { @@ -309,11 +316,11 @@ workflow CHIPSEQ { MARK_DUPLICATES_PICARD.out.bam ) ch_preseq_multiqc = PRESEQ_LCEXTRAP.out.lc_extrap - ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) + ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) } // - // MODULE: Post alignment QC + // MODULE: Picard post alignment QC // ch_picardcollectmultiplemetrics_multiqc = Channel.empty() if (!params.skip_picard_metrics) { @@ -327,13 +334,16 @@ workflow CHIPSEQ { } // - // MODULE: Strand cross-correlation + // MODULE: Phantompeaktools strand cross-correlation and QC metrics // PHANTOMPEAKQUALTOOLS ( FILTER_BAM_BAMTOOLS.out.bam ) ch_versions = ch_versions.mix(PHANTOMPEAKQUALTOOLS.out.versions.first()) + // + // MODULE: MultiQC custom content for Phantompeaktools + // MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS ( PHANTOMPEAKQUALTOOLS.out.spp.join(PHANTOMPEAKQUALTOOLS.out.rdata, by: [0]), ch_spp_nsc_header, @@ -342,7 +352,7 @@ workflow CHIPSEQ { ) // - // MODULE: Coverage tracks + // MODULE: BedGraph coverage tracks // BEDTOOLS_GENOMECOV ( FILTER_BAM_BAMTOOLS.out.bam.join(FILTER_BAM_BAMTOOLS.out.flagstat, by: [0]) @@ -350,7 +360,7 @@ workflow CHIPSEQ { ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) // - // MODULE: Coverage tracks + // MODULE: BigWig coverage tracks // UCSC_BEDGRAPHTOBIGWIG ( BEDTOOLS_GENOMECOV.out.bedgraph, @@ -358,23 +368,29 @@ workflow CHIPSEQ { ) ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions.first()) - // - // MODULE: Coverage plots - // ch_deeptoolsplotprofile_multiqc = Channel.empty() if (!params.skip_plot_profile) { + // + // MODULE: deepTools matrix generation for plotting + // DEEPTOOLS_COMPUTEMATRIX ( UCSC_BEDGRAPHTOBIGWIG.out.bigwig, PREPARE_GENOME.out.gene_bed ) ch_versions = ch_versions.mix(DEEPTOOLS_COMPUTEMATRIX.out.versions.first()) + // + // MODULE: deepTools profile plots + // DEEPTOOLS_PLOTPROFILE ( DEEPTOOLS_COMPUTEMATRIX.out.matrix ) ch_deeptoolsplotprofile_multiqc = DEEPTOOLS_PLOTPROFILE.out.table - ch_versions = ch_versions.mix(DEEPTOOLS_COMPUTEMATRIX.out.versions.first()) + ch_versions = ch_versions.mix(DEEPTOOLS_PLOTPROFILE.out.versions.first()) + // + // MODULE: deepTools heatmaps + // DEEPTOOLS_PLOTHEATMAP ( DEEPTOOLS_COMPUTEMATRIX.out.matrix ) @@ -382,51 +398,42 @@ workflow CHIPSEQ { } // - // Refactor channels: [ val(meta), [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] + // Create channels: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] // FILTER_BAM_BAMTOOLS .out .bam - .join (FILTER_BAM_BAMTOOLS.out.bai, by: [0]) - .map { - meta, bam, bai -> - meta.control ? null : [ meta.id, [ bam ] , [ bai ] ] - } - .set { ch_control_bam_bai } - - FILTER_BAM_BAMTOOLS - .out - .bam - .join (FILTER_BAM_BAMTOOLS.out.bai, by: [0]) - .map { - meta, bam, bai -> - meta.control ? [ meta.control, meta, [ bam ], [ bai ] ] : null + .join(FILTER_BAM_BAMTOOLS.out.bai, by: [0]) + .set { ch_genome_bam_bai } + + ch_genome_bam_bai + .combine(ch_genome_bam_bai) + .map { + meta1, bam1, bai1, meta2, bam2, bai2 -> + meta1.control == meta2.id ? [ meta1, [ bam1, bam2 ], [ bai1, bai2 ] ] : null } - .combine(ch_control_bam_bai, by: 0) - .map { it -> [ it[1] , it[2] + it[4], it[3] + it[5] ] } .set { ch_ip_control_bam_bai } - + // - // plotFingerprint for IP and control together + // MODULE: deepTools plotFingerprint joint QC for IP and control // ch_deeptoolsplotfingerprint_multiqc = Channel.empty() if (!params.skip_plot_fingerprint) { DEEPTOOLS_PLOTFINGERPRINT ( ch_ip_control_bam_bai ) - ch_deeptools_plotfingerprintmultiqc = DEEPTOOLS_PLOTFINGERPRINT.out.matrix + ch_deeptoolsplotfingerprint_multiqc = DEEPTOOLS_PLOTFINGERPRINT.out.matrix ch_versions = ch_versions.mix(DEEPTOOLS_PLOTFINGERPRINT.out.versions.first()) } // - // Call peaks + // MODULE: Calculute genome size with khmer // ch_macs_gsize = Channel.empty() ch_custompeaks_frip_multiqc = Channel.empty() ch_custompeaks_count_multiqc = Channel.empty() ch_plothomerannotatepeaks_multiqc = Channel.empty() ch_subreadfeaturecounts_multiqc = Channel.empty() - ch_macs_gsize = params.macs_gsize if (!params.macs_gsize) { KHMER_UNIQUEKMERS ( @@ -436,11 +443,17 @@ workflow CHIPSEQ { ch_macs_gsize = KHMER_UNIQUEKMERS.out.kmers.map { it.text.trim() } } - // Create channel: [ val(meta), ip_bam, control_bam ] + // Create channels: [ meta, ip_bam, control_bam ] ch_ip_control_bam_bai - .map { meta, bams, bais -> [ meta , bams[0], bams[1] ] } + .map { + meta, bams, bais -> + [ meta , bams[0], bams[1] ] + } .set { ch_ip_control_bam } + // + // MODULE: Call peaks with MACS2 + // MACS2_CALLPEAK ( ch_ip_control_bam, ch_macs_gsize @@ -448,7 +461,7 @@ workflow CHIPSEQ { ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) // - // Filter for MACS2 files without peaks + // Filter out samples with 0 MACS2 peaks called // MACS2_CALLPEAK .out @@ -456,23 +469,37 @@ workflow CHIPSEQ { .filter { meta, peaks -> peaks.size() > 0 } .set { ch_macs2_peaks } + // Create channels: [ meta, ip_bam, peaks ] ch_ip_control_bam .join(ch_macs2_peaks, by: [0]) - .map { it -> [ it[0], it[1], it[3] ] } - .set { ch_ip_peak } + .map { + it -> + [ it[0], it[1], it[3] ] + } + .set { ch_ip_bam_peaks } + // + // MODULE: Calculate FRiP score + // FRIP_SCORE ( - ch_ip_peak + ch_ip_bam_peaks ) ch_versions = ch_versions.mix(FRIP_SCORE.out.versions.first()) - ch_ip_peak + // Create channels: [ meta, peaks, frip ] + ch_ip_bam_peaks .join(FRIP_SCORE.out.txt, by: [0]) - .map { it -> [ it[0], it[2], it[3] ] } - .set { ch_ip_peak_frip } + .map { + it -> + [ it[0], it[2], it[3] ] + } + .set { ch_ip_peaks_frip } + // + // MODULE: FRiP score custom content for MultiQC + // MULTIQC_CUSTOM_PEAKS ( - ch_ip_peak_frip, + ch_ip_peaks_frip, ch_peak_count_header, ch_frip_score_header ) @@ -480,6 +507,9 @@ workflow CHIPSEQ { ch_custompeaks_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count if (!params.skip_peak_annotation) { + // + // MODULE: Annotate peaks with MACS2 + // HOMER_ANNOTATEPEAKS_MACS2 ( ch_macs2_peaks, PREPARE_GENOME.out.fasta, @@ -488,11 +518,17 @@ workflow CHIPSEQ { ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_MACS2.out.versions.first()) if (!params.skip_peak_qc) { + // + // MODULE: MACS2 QC plots with R + // PLOT_MACS2_QC ( ch_macs2_peaks.collect{it[1]} ) ch_versions = ch_versions.mix(PLOT_MACS2_QC.out.versions) + // + // MODULE: Peak annotation QC plots with R + // PLOT_HOMER_ANNOTATEPEAKS ( HOMER_ANNOTATEPEAKS_MACS2.out.txt.collect{it[1]}, ch_peak_annotation_header, @@ -510,10 +546,13 @@ workflow CHIPSEQ { ch_deseq2_pca_multiqc = Channel.empty() ch_deseq2_clustering_multiqc = Channel.empty() if (!params.skip_consensus_peaks) { - // Create channel: [ meta , [ peaks ] ] - // Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ] + // Create channels: [ meta , [ peaks ] ] + // Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ] ch_macs2_peaks - .map { meta, peak -> [ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ] } + .map { + meta, peak -> + [ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ] + } .groupTuple() .map { antibody, groups, peaks -> @@ -521,23 +560,31 @@ workflow CHIPSEQ { antibody, groups.groupBy().collectEntries { [(it.key) : it.value.size()] }, peaks - ] } + ] + } .map { antibody, groups, peaks -> - def meta = [:] - meta.id = antibody - meta.multiple_groups = groups.size() > 1 - meta.replicates_exist = groups.max { groups.value }.value > 1 - [ meta, peaks ] } + def meta_new = [:] + meta_new.id = antibody + meta_new.multiple_groups = groups.size() > 1 + meta_new.replicates_exist = groups.max { groups.value }.value > 1 + [ meta_new, peaks ] + } .set { ch_antibody_peaks } + // + // MODULE: Generate consensus peaks across samples + // MACS2_CONSENSUS ( ch_antibody_peaks ) ch_macs2_consensus_bed_lib = MACS2_CONSENSUS.out.bed - ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions) + ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions) if (!params.skip_peak_annotation) { + // + // MODULE: Annotate consensus peaks + // HOMER_ANNOTATEPEAKS_CONSENSUS ( MACS2_CONSENSUS.out.bed, PREPARE_GENOME.out.fasta, @@ -545,40 +592,52 @@ workflow CHIPSEQ { ) ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_CONSENSUS.out.versions) + // + // MODULE: Add boolean fields to annotated consensus peaks to aid filtering + // ANNOTATE_BOOLEAN_PEAKS ( MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS_CONSENSUS.out.txt, by: [0]), ) ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions) } - // Create channel: [ val(meta), ip_bam ] + // Create channels: [ antibody, [ ip_bams ] ] + ch_ip_control_bam + .map { + meta, ip_bam, control_bam -> + [ meta.antibody, ip_bam ] + } + .groupTuple() + .set { ch_antibody_bams } + + // Create channels: [ meta, [ ip_bams ], saf ] MACS2_CONSENSUS .out .saf - .map { meta, saf -> [ meta.id, meta, saf ] } - .set { ch_ip_saf } - - ch_ip_control_bam - .map { meta, ip_bam, control_bam -> [ meta.antibody, meta, ip_bam ] } - .groupTuple() - .map { it -> [ it[0], it[1][0], it[2].flatten().sort() ] } - .join(ch_ip_saf) + .map { + meta, saf -> + [ meta.id, meta, saf ] + } + .join(ch_antibody_bams) .map { - it -> - def fmeta = it[1] - fmeta['id'] = it[3]['id'] - fmeta['replicates_exist'] = it[3]['replicates_exist'] - fmeta['multiple_groups'] = it[3]['multiple_groups'] - [ fmeta, it[2], it[4] ] } - .set { ch_ip_bam } + antibody, meta, saf, bams -> + [ meta, bams.flatten().sort(), saf ] + } + .set { ch_saf_bams } + // + // MODULE: Quantify peaks across samples with featureCounts + // SUBREAD_FEATURECOUNTS ( - ch_ip_bam + ch_saf_bams ) ch_subreadfeaturecounts_multiqc = SUBREAD_FEATURECOUNTS.out.summary ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions.first()) if (!params.skip_deseq2_qc) { + // + // MODULE: Generate QC plots with DESeq2 + // DESEQ2_QC ( SUBREAD_FEATURECOUNTS.out.counts, ch_deseq2_pca_header, @@ -590,7 +649,7 @@ workflow CHIPSEQ { } // - // Create IGV session + // MODULE: Create IGV session // if (!params.skip_igv) { IGV ( @@ -599,13 +658,21 @@ workflow CHIPSEQ { ch_macs2_peaks.collect{it[1]}.ifEmpty([]), ch_macs2_consensus_bed_lib.collect{it[1]}.ifEmpty([]), { "${params.aligner}/mergedLibrary/bigwig" }, - { ["${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak' - ].join('') }, - { ["${params.aligner}/mergedLibrary/macs2", - params.narrow_peak? '/narrowPeak' : '/broadPeak', - '/consensus' - ].join('') } + { + [ + "${params.aligner}/mergedLibrary/macs2", + params.narrow_peak ? '/narrowPeak' : '/broadPeak' + ] + .join('') + }, + { + [ + "${params.aligner}/mergedLibrary/macs2", + params.narrow_peak ? '/narrowPeak' : '/broadPeak', + '/consensus' + ] + .join('') + } ) ch_versions = ch_versions.mix(IGV.out.versions) } @@ -649,8 +716,10 @@ workflow CHIPSEQ { ch_picardcollectmultiplemetrics_multiqc.collect{it[1]}.ifEmpty([]), ch_preseq_multiqc.collect{it[1]}.ifEmpty([]), + ch_deeptoolsplotprofile_multiqc.collect{it[1]}.ifEmpty([]), ch_deeptoolsplotfingerprint_multiqc.collect{it[1]}.ifEmpty([]), + PHANTOMPEAKQUALTOOLS.out.spp.collect{it[1]}.ifEmpty([]), MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.nsc.collect{it[1]}.ifEmpty([]), MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.rsc.collect{it[1]}.ifEmpty([]), @@ -660,10 +729,11 @@ workflow CHIPSEQ { ch_custompeaks_count_multiqc.collect{it[1]}.ifEmpty([]), ch_plothomerannotatepeaks_multiqc.collect().ifEmpty([]), ch_subreadfeaturecounts_multiqc.collect{it[1]}.ifEmpty([]), + ch_deseq2_pca_multiqc.collect().ifEmpty([]), ch_deseq2_clustering_multiqc.collect().ifEmpty([]) ) - multiqc_report = MULTIQC.out.report.toList() + multiqc_report = MULTIQC.out.report.toList() } }