Skip to content

Commit

Permalink
Merge pull request #304 from drpatelh/updates
Browse files Browse the repository at this point in the history
Fix missing reports in MultiQC and other publishing discrepancies with DSL1 version
  • Loading branch information
JoseEspinosa authored Sep 15, 2022
2 parents 1683f39 + e7c5516 commit 84c2299
Show file tree
Hide file tree
Showing 16 changed files with 209 additions and 127 deletions.
2 changes: 1 addition & 1 deletion assets/multiqc_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ module_order:
anchor: "mlib_featurecounts"
info: "This section of the report shows featureCounts results for the number of reads assigned to merged library consensus peaks."
path_filters:
- "./macs/consensus/*.summary"
- "./macs2/featurecounts/*.summary"

report_section_order:
peak_count:
Expand Down
24 changes: 16 additions & 8 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,6 @@ process {

withName: 'PHANTOMPEAKQUALTOOLS' {
ext.args2 = { "-p=$task.cpus" }
ext.prefix = { "${meta.id}.mLb.clN" }
publishDir = [
path: { "${params.outdir}/${params.aligner}/mergedLibrary/phantompeakqualtools" },
mode: params.publish_dir_mode,
Expand Down Expand Up @@ -599,7 +598,8 @@ if (!params.skip_peak_annotation) {
}

withName: 'PLOT_HOMER_ANNOTATEPEAKS' {
ext.args = '-o ./ -p macs2_annotatePeaks'
ext.args = '-o ./'
ext.prefix = 'macs2_annotatePeaks'
publishDir = [
path: { [
"${params.outdir}/${params.aligner}/mergedLibrary/macs2",
Expand All @@ -623,7 +623,8 @@ if (!params.skip_consensus_peaks) {
path: { [
"${params.outdir}/${params.aligner}/mergedLibrary/macs2",
params.narrow_peak? '/narrowPeak' : '/broadPeak',
'/consensus'
'/consensus',
"/${meta.id}"
].join('') },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
Expand All @@ -632,11 +633,13 @@ if (!params.skip_consensus_peaks) {

withName: 'SUBREAD_FEATURECOUNTS' {
ext.args = '-F SAF -O --fracOverlap 0.2'
ext.prefix = { "${meta.id}.consensus_peaks" }
publishDir = [
path: { [
"${params.outdir}/${params.aligner}/mergedLibrary/macs2",
params.narrow_peak? '/narrowPeak' : '/broadPeak',
'/consensus'
'/consensus',
"/${meta.id}"
].join('') },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
Expand All @@ -648,24 +651,27 @@ if (!params.skip_consensus_peaks) {
process {
withName: 'HOMER_ANNOTATEPEAKS_CONSENSUS' {
ext.args = '-gid'
ext.prefix = 'consensus_peaks'
ext.prefix = { "${meta.id}.consensus_peaks" }
publishDir = [
path: { [
"${params.outdir}/${params.aligner}/mergedLibrary/macs2",
params.narrow_peak? '/narrowPeak' : '/broadPeak',
'/consensus'
'/consensus',
"/${meta.id}"
].join('') },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'ANNOTATE_BOOLEAN_PEAKS' {
ext.prefix = { "${meta.id}_peaks" }
ext.prefix = { "${meta.id}.consensus_peaks" }
publishDir = [
path: { [
"${params.outdir}/${params.aligner}/mergedLibrary/macs2",
params.narrow_peak? '/narrowPeak' : '/broadPeak',
'/consensus'
'/consensus',
"/${meta.id}"
].join('') },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
Expand All @@ -684,11 +690,13 @@ if (!params.skip_consensus_peaks) {
'--count_col 7',
params.deseq2_vst ? '--vst TRUE' : ''
].join(' ').trim()
ext.prefix = { "${meta.id}.consensus_peaks" }
publishDir = [
path: { [
"${params.outdir}/${params.aligner}/mergedLibrary/macs2",
params.narrow_peak? '/narrowPeak' : '/broadPeak',
'/consensus',
"/${meta.id}",
'/deseq2'
].join('') },
mode: params.publish_dir_mode,
Expand Down
8 changes: 4 additions & 4 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
*/

params {
config_profile_name = 'Test profile'
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = 6.GB
max_time = 6.h
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_test.csv'
Expand Down
2 changes: 1 addition & 1 deletion conf/test_full.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ params {
// Input data for full size test
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_full.csv'

// Used to get macs_gsize
// Used to calculate --macs_gsize
read_length = 50

// Genome references
Expand Down
2 changes: 1 addition & 1 deletion docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to co

**This pipeline uses a standardised DESeq2 analysis script to get an idea of the reproducibility within the experiment, and to assess the overall differential binding. Please note that this will not suit every experimental design, and if there are other problems with the experiment then it may not work as well as expected.**

For larger experiments, it may be recommended to use the `vst` transformation instead of the default `rlog` option. You can do this by providing the `--deseq2_vst` parameter to the pipeline. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation.
For larger experiments, it is recommended to use the `vst` transformation instead of the `rlog` option. This is the default behaviour and can be controlled with the `--deseq2_vst` parameter. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation.

![MultiQC - DESeq2 PCA plot](images/mqc_deseq2_pca_plot.png)

Expand Down
4 changes: 2 additions & 2 deletions lib/WorkflowChipseq.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class WorkflowChipseq {


if (!params.fasta) {
log.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file."
log.error "Genome fasta file not specified with e.g. '--fasta' or via a detectable config file."
System.exit(1)
}

Expand Down Expand Up @@ -98,7 +98,7 @@ class WorkflowChipseq {
log.warn "=============================================================================\n" +
" --macs_gsize parameter has not been provided.\n" +
" It will be auto-calculated by 'khmer unique-kmers.py' using the '--read_length' parameter.\n" +
" Explicitly provide '--macs_gsize macs2_genome_size' to change this behaviour.\n" +
" Explicitly provide '--macs_gsize' to change this behaviour.\n" +
"==================================================================================="
}

Expand Down
2 changes: 1 addition & 1 deletion modules/local/annotate_boolean_peaks.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process ANNOTATE_BOOLEAN_PEAKS {

tag "$meta.id"
label 'process_low'

conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
Expand Down
7 changes: 3 additions & 4 deletions modules/local/deseq2_qc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@ process DESEQ2_QC {
script:
def args = task.ext.args ?: ''
def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak'
def antibody = meta.antibody
def prefix = "${antibody}.consensus_peaks"
def prefix = task.ext.prefix ?: "${meta.id}"
"""
deseq2_qc.r \\
--count_file $counts \\
Expand All @@ -38,11 +37,11 @@ process DESEQ2_QC {
$args
sed 's/deseq2_pca/deseq2_pca_${task.index}/g' <$deseq2_pca_header >tmp.txt
sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt
sed -i -e 's/DESeq2 /${meta.id} DESeq2 /g' tmp.txt
cat tmp.txt ${prefix}.pca.vals.txt > ${prefix}.pca.vals_mqc.tsv
sed 's/deseq2_clustering/deseq2_clustering_${task.index}/g' <$deseq2_clustering_header >tmp.txt
sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt
sed -i -e 's/DESeq2 /${meta.id} DESeq2 /g' tmp.txt
cat tmp.txt ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv
cat <<-END_VERSIONS > versions.yml
Expand Down
2 changes: 1 addition & 1 deletion modules/local/frip_score.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ process FRIP_SCORE {
"""
READS_IN_PEAKS=\$(intersectBed -a $bam -b $peak $args | awk -F '\t' '{sum += \$NF} END {print sum}')
samtools flagstat $bam > ${bam}.flagstat
grep 'mapped (' ${bam}.flagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${prefix}", a/\$1}' > ${prefix}.FRiP.txt
grep 'mapped (' ${bam}.flagstat | grep -v "primary" | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${prefix}", a/\$1}' > ${prefix}.FRiP.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
5 changes: 3 additions & 2 deletions modules/local/multiqc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@ process MULTIQC {
path ('alignment/mergedLibrary/filtered/picard_metrics/*')

path ('preseq/*')

path ('deeptools/*')
path ('deeptools/*')

path ('phantompeakqualtools/*')
path ('phantompeakqualtools/*')
path ('phantompeakqualtools/*')
Expand All @@ -41,8 +43,7 @@ process MULTIQC {
path ('macs2/peaks/*')
path ('macs2/peaks/*')
path ('macs2/annotation/*')

path ('featurecounts/*')
path ('macs2/featurecounts/*')

path ('deseq2/*')
path ('deseq2/*')
Expand Down
2 changes: 1 addition & 1 deletion modules/local/multiqc_custom_peaks.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
process MULTIQC_CUSTOM_PEAKS {

tag "$meta.id"
conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
Expand Down
1 change: 1 addition & 0 deletions modules/local/multiqc_custom_phantompeakqualtools.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
process MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS {
tag "$meta.id"
conda (params.enable_conda ? "conda-forge::r-base=3.5.1" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/r-base:3.5.1':
Expand Down
4 changes: 3 additions & 1 deletion modules/local/plot_homer_annotatepeaks.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ process PLOT_HOMER_ANNOTATEPEAKS {

script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "annotatepeaks"
"""
plot_homer_annotatepeaks.r \\
-i ${annos.join(',')} \\
-s ${annos.join(',').replaceAll("${suffix}","")} \\
-p $prefix \\
$args
find ./ -type f -name "*.txt" -exec cat {} \\; | cat $mqc_header - > annotatepeaks.summary_mqc.tsv
find ./ -type f -name "*.txt" -exec cat {} \\; | cat $mqc_header - > ${prefix}.summary_mqc.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ params {
skip_consensus_peaks = false

// Options: DESeq2 QC
deseq2_vst = false
deseq2_vst = true
skip_deseq2_qc = false

// Options: QC
Expand Down
3 changes: 2 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,8 @@
"type": "boolean",
"description": "Use vst transformation instead of rlog with DESeq2.",
"help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).",
"fa_icon": "fas fa-dolly"
"fa_icon": "fas fa-dolly",
"default": true
},
"skip_plot_profile": {
"type": "boolean",
Expand Down
Loading

0 comments on commit 84c2299

Please sign in to comment.