From 7a17e3048e5c4cd3bb95b3495a049a74ee87330f Mon Sep 17 00:00:00 2001 From: Chris Miller Date: Sun, 9 Sep 2018 23:21:44 -0500 Subject: [PATCH 1/3] doing work in output dir instead of tmp, gzipping output vcf --- mapq0_vcf_filter.sh | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/mapq0_vcf_filter.sh b/mapq0_vcf_filter.sh index 714719e..090837f 100755 --- a/mapq0_vcf_filter.sh +++ b/mapq0_vcf_filter.sh @@ -1,24 +1,23 @@ #!/bin/bash set -e -vcf=$1 -bam=$2 -ref_fasta=$3 -mapq0perc=$4 -outvcf=$5 - +outvcf=$1 +vcf=$2 +bam=$3 +ref_fasta=$4 +mapq0perc=$5 +outdir=$(dirname "$outvcf") #grab sites that don't already have the MQ0 field count=0; zgrep -v "^#" "$vcf" | grep -v "MQ0" | cut -f 1,2 | while read chr pos;do - echo $chr $pos $((pos+1)) - pysamstats --type mapq --chromosome $chr --start $pos --end $((pos+1)) "$bam" | grep $pos | cut -f 1,2,5 >>/tmp/mapq0counts + pysamstats --type mapq --chromosome $chr --start $pos --end $((pos+1)) "$bam" | grep $pos | cut -f 1,2,5 >>$outdir/mapq0counts count=$((count+1)) done if [[ $count -eq 0 ]];then #no sites to process, just copy the vcf - cp $vcf /tmp/mapq0.vcf + cp $vcf $outdir/mapq0.vcf else #need to add MQ0 @@ -37,14 +36,15 @@ else if [[ $mqcount -gt 0 ]];then #already has mq0 set, we're all good - vcf-info-annotator --overwrite -o /tmp/mapq0.vcf "$vcf" /tmp/mapq0counts MQ0 + vcf-info-annotator --overwrite -o $outdir/mapq0.vcf "$vcf" $outdir/mapq0counts MQ0 else #no mq0, need to set the header line as well - vcf-info-annotator -o /tmp/mapq0.vcf -f Integer -d "Number of MAPQ == 0 reads covering this record" "$vcf" /tmp/mapq0counts MQ0 + vcf-info-annotator -o $outdir/mapq0.vcf -f Integer -d "Number of MAPQ == 0 reads covering this record" "$vcf" $outdir/mapq0counts MQ0 fi fi - #finally, set the filter tags on the vcf #the multiplication by 1.0 is necessary to convert integers to floats before dividing in the JEXL expression #(which is dumb, and I want an hour of my life back) -java -jar /opt/GenomeAnalysisTK.jar -T VariantFiltration -R $ref_fasta -o $outvcf --variant /tmp/mapq0.vcf --filterExpression "((MQ0*1.0) / (DP*1.0)) > $mapq0perc" --filterName "MAPQ0" +java -jar /opt/GenomeAnalysisTK.jar -T VariantFiltration -R $ref_fasta -o $outdir/filtered.vcf --variant $outdir/mapq0.vcf --filterExpression "((MQ0*1.0) / (DP*1.0)) > $mapq0perc" --filterName "MAPQ0" + +gzip -c $outdir/filtered.vcf >$outvcf From 14555c0a248e94765f743766400db44b862dc509 Mon Sep 17 00:00:00 2001 From: Chris Miller Date: Sun, 9 Sep 2018 23:21:44 -0500 Subject: [PATCH 2/3] doing work in output dir instead of tmp, gzipping output vcf --- mapq0_vcf_filter.sh | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/mapq0_vcf_filter.sh b/mapq0_vcf_filter.sh index 714719e..090837f 100755 --- a/mapq0_vcf_filter.sh +++ b/mapq0_vcf_filter.sh @@ -1,24 +1,23 @@ #!/bin/bash set -e -vcf=$1 -bam=$2 -ref_fasta=$3 -mapq0perc=$4 -outvcf=$5 - +outvcf=$1 +vcf=$2 +bam=$3 +ref_fasta=$4 +mapq0perc=$5 +outdir=$(dirname "$outvcf") #grab sites that don't already have the MQ0 field count=0; zgrep -v "^#" "$vcf" | grep -v "MQ0" | cut -f 1,2 | while read chr pos;do - echo $chr $pos $((pos+1)) - pysamstats --type mapq --chromosome $chr --start $pos --end $((pos+1)) "$bam" | grep $pos | cut -f 1,2,5 >>/tmp/mapq0counts + pysamstats --type mapq --chromosome $chr --start $pos --end $((pos+1)) "$bam" | grep $pos | cut -f 1,2,5 >>$outdir/mapq0counts count=$((count+1)) done if [[ $count -eq 0 ]];then #no sites to process, just copy the vcf - cp $vcf /tmp/mapq0.vcf + cp $vcf $outdir/mapq0.vcf else #need to add MQ0 @@ -37,14 +36,15 @@ else if [[ $mqcount -gt 0 ]];then #already has mq0 set, we're all good - vcf-info-annotator --overwrite -o /tmp/mapq0.vcf "$vcf" /tmp/mapq0counts MQ0 + vcf-info-annotator --overwrite -o $outdir/mapq0.vcf "$vcf" $outdir/mapq0counts MQ0 else #no mq0, need to set the header line as well - vcf-info-annotator -o /tmp/mapq0.vcf -f Integer -d "Number of MAPQ == 0 reads covering this record" "$vcf" /tmp/mapq0counts MQ0 + vcf-info-annotator -o $outdir/mapq0.vcf -f Integer -d "Number of MAPQ == 0 reads covering this record" "$vcf" $outdir/mapq0counts MQ0 fi fi - #finally, set the filter tags on the vcf #the multiplication by 1.0 is necessary to convert integers to floats before dividing in the JEXL expression #(which is dumb, and I want an hour of my life back) -java -jar /opt/GenomeAnalysisTK.jar -T VariantFiltration -R $ref_fasta -o $outvcf --variant /tmp/mapq0.vcf --filterExpression "((MQ0*1.0) / (DP*1.0)) > $mapq0perc" --filterName "MAPQ0" +java -jar /opt/GenomeAnalysisTK.jar -T VariantFiltration -R $ref_fasta -o $outdir/filtered.vcf --variant $outdir/mapq0.vcf --filterExpression "((MQ0*1.0) / (DP*1.0)) > $mapq0perc" --filterName "MAPQ0" + +gzip -c $outdir/filtered.vcf >$outvcf From 0f52fc3ca0d92356a0ef2ca05f355780b0f1d5e6 Mon Sep 17 00:00:00 2001 From: Chris Miller Date: Mon, 10 Sep 2018 09:38:37 -0500 Subject: [PATCH 3/3] let gatk handle the gzipping --- mapq0_vcf_filter.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/mapq0_vcf_filter.sh b/mapq0_vcf_filter.sh index 090837f..ef162e7 100755 --- a/mapq0_vcf_filter.sh +++ b/mapq0_vcf_filter.sh @@ -45,6 +45,4 @@ fi #finally, set the filter tags on the vcf #the multiplication by 1.0 is necessary to convert integers to floats before dividing in the JEXL expression #(which is dumb, and I want an hour of my life back) -java -jar /opt/GenomeAnalysisTK.jar -T VariantFiltration -R $ref_fasta -o $outdir/filtered.vcf --variant $outdir/mapq0.vcf --filterExpression "((MQ0*1.0) / (DP*1.0)) > $mapq0perc" --filterName "MAPQ0" - -gzip -c $outdir/filtered.vcf >$outvcf +java -jar /opt/GenomeAnalysisTK.jar -T VariantFiltration -R $ref_fasta -o $outdir/mapq_filtered.vcf.gz --variant $outdir/mapq0.vcf --filterExpression "((MQ0*1.0) / (DP*1.0)) > $mapq0perc" --filterName "MAPQ0"