Skip to content

Commit

Permalink
Merged in PIVOT_wes_updates (pull request #202)
Browse files Browse the repository at this point in the history
pivot wes updates
  • Loading branch information
MikeWLloyd committed Aug 16, 2024
2 parents f6152b9 + 584ce44 commit 259d10a
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 2 deletions.
2 changes: 2 additions & 0 deletions bin/help/pdx_wes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Parameter | Default | Description
--csv_input | null | Provide a CSV manifest file with the header: "sampleID,lane,fastq_1,fastq_2". See the repository wiki for an example file. Fastq_2 is optional and used only in PE data. Fastq files can either be absolute paths to local files, or URLs to remote files. If remote URLs are provided, `--download_data` must be specified.
--download_data | null | Requires `--csv_input`. When specified, read data in the CSV manifest will be downloaded from provided URLs.
--deduplicate_reads | false | Options: false, true. If specified, run bbmap clumpify on input reads. Clumpify will deduplicate reads prior to trimming. This can help with mapping and downstream steps when analyzing high coverage WES data.
--gen_org | human | Options: human only.
--ref_fa | '/projects/omics_share/human/GRCh38/genome/sequence/gatk/Homo_sapiens_assembly38.fasta' | The reference fasta to be used throughout the process for alignment as well as any downstream analysis. JAX users should not change this parameter.
Expand Down
1 change: 1 addition & 0 deletions bin/log/pdx_wes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ ______________________________________________________
-c ${params.config}
--pubdir ${params.pubdir}
--organize_by ${params.organize_by}
--deduplicate_reads ${params.deduplicate_reads}
--xenome_index ${params.xenome_prefix}
--ref_fa ${params.ref_fa}
--ref_fa_indices ${params.ref_fa_indices}
Expand Down
3 changes: 2 additions & 1 deletion config/pdx_wes.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ params {
concat_lanes = false
download_data = false
csv_input = null

deduplicate_reads = false

multiqc_config = "${projectDir}/bin/shared/multiqc/pdx_wes_multiqc.yaml"

// Reference fasta
Expand Down
37 changes: 37 additions & 0 deletions modules/bbmap/bbmap_clumpify.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
process CLUMPIFY {
tag "$sampleID"

cpus 6
memory 200.GB
time '48:00:00'
errorStrategy {(task.exitStatus == 140) ? {log.info "\n\nError code: ${task.exitStatus} for task: ${task.name}. Likely caused by the task wall clock: ${task.time} or memory: ${task.memory} being exceeded.\nAttempting orderly shutdown.\nSee .command.log in: ${task.workDir} for more info.\n\n"; return 'finish'}.call() : 'finish'}

container 'quay.io/biocontainers/bbmap:39.06--h92535d8_0'

publishDir "${params.pubdir}/${ params.organize_by=='sample' ? sampleID+'/stats' : 'stats'}", pattern: "*.txt", mode:'copy'

input:
tuple val(sampleID), path(fq_reads)

output:
tuple val(sampleID), path("${sampleID}.clumpy.R*.fastq.gz"), emit: clumpy_fastq
tuple val(sampleID), path("*log.txt"), emit: clumpy_log

script:
if (params.read_type == "SE")
"""
testformat.sh ${fq_reads[0]} > fastq_format.txt
if grep -q 'illumina' fastq_format.txt ; then qual=64; else qual=33; fi
clumpify.sh in=${fq_reads[0]} out=${sampleID}.clumpy.R1.fastq.gz tmpdir=./ usetmpdir=t dedupe=t qin=\${qual} -Xmx199g &> ${sampleID}_clumpy_log.txt
"""
else
"""
testformat.sh ${fq_reads[0]} > fastq_format.txt
if grep -q 'illumina' fastq_format.txt ; then qual=64; else qual=33; fi
clumpify.sh in=${fq_reads[0]} in2=${fq_reads[1]} out=${sampleID}.clumpy.R1.fastq.gz out2=${sampleID}.clumpy.R2.fastq.gz tmpdir=./ usetmpdir=t dedupe=t qin=\${qual} -Xmx199g &> ${sampleID}_clumpy_log.txt
"""
}
11 changes: 10 additions & 1 deletion workflows/pdx_wes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ include {FILE_DOWNLOAD} from "${projectDir}/subworkflows/aria_download_parse"
include {CONCATENATE_LOCAL_FILES} from "${projectDir}/subworkflows/concatenate_local_files"
include {CONCATENATE_READS_PE} from "${projectDir}/modules/utility_modules/concatenate_reads_PE"
include {CONCATENATE_READS_SE} from "${projectDir}/modules/utility_modules/concatenate_reads_SE"
include {CLUMPIFY} from "${projectDir}/modules/bbmap/bbmap_clumpify"
include {JAX_TRIMMER} from "${projectDir}/modules/utility_modules/jax_trimmer"
include {FASTQC} from "${projectDir}/modules/fastqc/fastqc"
include {XENOME_CLASSIFY} from "${projectDir}/modules/xenome/xenome"
Expand Down Expand Up @@ -138,8 +139,16 @@ workflow PDX_WES {

// ** MAIN workflow starts:

// Optional Step -- Clumpify
if (params.deduplicate_reads) {
CLUMPIFY(read_ch)
trimmer_input = CLUMPIFY.out.clumpy_fastq
} else {
trimmer_input = read_ch
}

// Step 1: Qual_Stat
JAX_TRIMMER(read_ch)
JAX_TRIMMER(trimmer_input)

xenome_input = JAX_TRIMMER.out.trimmed_fastq

Expand Down

0 comments on commit 259d10a

Please sign in to comment.