From ebc34c2a5d3c89ca669b35fb4d2998d24da12979 Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Fri, 17 Jun 2022 15:38:53 -0700 Subject: [PATCH] Allow genome size to be configurable in downsample_amplicons script (#51) --- bin/downsample_amplicons.py | 3 ++- modules/illumina.nf | 2 +- modules/utils.nf | 11 +++++++++-- workflows/illuminaNcov.nf | 2 +- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/bin/downsample_amplicons.py b/bin/downsample_amplicons.py index 582c1a7..5ba68ff 100755 --- a/bin/downsample_amplicons.py +++ b/bin/downsample_amplicons.py @@ -229,7 +229,7 @@ def main(args): required_depth_achieved = [False] * len(genome_checkpoints) - depths = [0] * 30000 + depths = [0] * (args.genome_size + 100) infile = pysam.AlignmentFile(args.bam, "rb") bam_header = infile.header.copy().to_dict() @@ -303,6 +303,7 @@ def main(args): parser.add_argument('--min-depth', type=int, default=200, help='Subsample to n coverage') parser.add_argument('--mapping-quality', type=int, default=20, help='Minimum mapping quality to include read in output') parser.add_argument('--amplicon-subdivisions', type=int, default=3, help='How many times to divide amplicons to detect coverage') + parser.add_argument('--genome-size', type=int, default=29903, help='') parser.add_argument('--verbose', action='store_true', help='Debug mode') args = parser.parse_args() main(args) diff --git a/modules/illumina.nf b/modules/illumina.nf index 76a91a4..544c3ef 100644 --- a/modules/illumina.nf +++ b/modules/illumina.nf @@ -228,7 +228,7 @@ process callConsensusFreebayes { bcftools consensus -f ${ref} -I ${sampleName}.ambiguous.norm.vcf.gz > ${sampleName}.ambiguous.fa # apply remaninng variants, including indels - bcftools consensus -f ${sampleName}.ambiguous.fa -m ${sampleName}.mask.txt ${sampleName}.fixed.norm.vcf.gz | sed s/MN908947.3/${sampleName}/ > ${sampleName}.consensus.fa + bcftools consensus -f ${sampleName}.ambiguous.fa -m ${sampleName}.mask.txt ${sampleName}.fixed.norm.vcf.gz | sed s/${params.viral_contig_name}/${sampleName}/ > ${sampleName}.consensus.fa """ } diff --git a/modules/utils.nf b/modules/utils.nf index e65b3f8..c0655a8 100644 --- a/modules/utils.nf +++ b/modules/utils.nf @@ -27,14 +27,21 @@ process downsampleAmplicons { publishDir "${params.outdir}/${task.process.replaceAll(":","_")}", pattern: "${sampleId}.mapped.primertrimmed.downsampled.sorted{.bam,.bam.bai}", mode: 'copy' input: - tuple val(sampleId), path(trimmed_bam), path(trimmed_bam_index), path(bedfile) + tuple val(sampleId), path(trimmed_bam), path(trimmed_bam_index), path(bedfile), path(ref) output: tuple val(sampleId), path("${sampleId}.mapped.primertrimmed.downsampled.sorted.bam"), path("${sampleId}.mapped.primertrimmed.downsampled.sorted.bam.bai"), emit: alignment path("downsampling_summary.csv"), emit: summary script: """ - downsample_amplicons.py --bed ${bedfile} --min-depth ${params.downsampleMinDepth} --mapping-quality ${params.downsampleMappingQuality} --amplicon-subdivisions ${params.downsampleAmpliconSubdivisions} ${trimmed_bam} 2> downsampling_summary_no_sample_id.csv | \ + samtools faidx ${ref} + downsample_amplicons.py \ + --bed ${bedfile} \ + --min-depth ${params.downsampleMinDepth} \ + --mapping-quality ${params.downsampleMappingQuality} \ + --amplicon-subdivisions ${params.downsampleAmpliconSubdivisions} \ + --genome-size \$(cut -d \$'\\t' -f 2 ${ref}.fai) \ + ${trimmed_bam} 2> downsampling_summary_no_sample_id.csv | \ samtools sort - -o ${sampleId}.mapped.primertrimmed.downsampled.sorted.bam samtools index ${sampleId}.mapped.primertrimmed.downsampled.sorted.bam paste -d ',' <(echo "sample_id" && echo "${sampleId}") downsampling_summary_no_sample_id.csv > downsampling_summary.csv diff --git a/workflows/illuminaNcov.nf b/workflows/illuminaNcov.nf index 9465ca4..0687e49 100644 --- a/workflows/illuminaNcov.nf +++ b/workflows/illuminaNcov.nf @@ -108,7 +108,7 @@ workflow sequenceAnalysis { trimPrimerSequences(readMapping.out.combine(ch_bedFile)) - downsampleAmplicons(trimPrimerSequences.out.ptrim.combine(ch_bedFile)) + downsampleAmplicons(trimPrimerSequences.out.ptrim.combine(ch_bedFile).combine(Channel.fromPath(params.ref))) downsampledBamToFastq(downsampleAmplicons.out.alignment)