-
Notifications
You must be signed in to change notification settings - Fork 0
/
bwamem.py
executable file
·79 lines (62 loc) · 2.49 KB
/
bwamem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
import logging
import subprocess
import argparse
import sys
import tempfile
import string
import shutil
def run_command(command=str):
logging.info("Running: %s" % (command))
run=subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
(stdout,stderr)=run.communicate()
if run.returncode != 0:
raise Exception("Tool fail %s\n%s" % (stdout,stderr))
return (stdout,stderr)
def timing(command_name):
date_cmd = r'date +%s'
run_command("%s >> %s_timing.txt" % (date_cmd,command_name))
def get_readgroup(bamfile):
out, err = run_command("samtools view -H %s" % (bamfile))
rgs = []
for line in out.split("\n"):
if line.startswith("@RG\t"):
rgs.append(line.rstrip("\n\r"))
if len(rgs) != 1:
raise Exception("Wrong number of readgroups in BAM file")
return rgs[0]
def get_rgid(rgline):
for i in rgline.split("\t"):
if i.startswith("ID:"):
return i[3:]
def main(args):
rgline = get_readgroup(args.inbam)
rgid = get_rgid(rgline)
work_dir = tempfile.mkdtemp(dir=args.workdir, prefix="bwa_mem_")
template = "bamtofastq T=${tmpdir}/bamtofastq_tmp S=${tmpdir}/single.fq O=${tmpdir}/unmatched_1.fq O2=${tmpdir}/unmatched_2.fq exclude=QCFAIL,SECONDARY,SUPPLEMENTARY collate=1 filename=${inbam} | \
${filter_cmd} 2> ${inbam}.count.txt | \
bwa mem -t 8 -p -T 0 -R '${rgline}' ${refseq} - | \
bamsort inputformat=sam level=1 inputthreads=2 outputthreads=2 calmdnm=1 calmdnmrecompindetonly=1 calmdnmreference=${refseq} tmpfile=${tmpdir}/out.sorttmp O=${tmpdir}/out.bam 2> ${inbam}.bamsort_info.txt"
#used to fix read name issue in bamtofastq output AND to produce a read counts file
rg_filter_command = 'perl -e \'while(<>){$i++; $_ =~ s|@[01](/[12])$|\\1| if($i % 4 == 1); print $_;} $c = $i/4; warn "$c\n";\''
cmd = string.Template(template).substitute({
"inbam" : args.inbam,
"outbam" : args.outbam,
"filter_cmd" : rg_filter_command,
"rgline" : rgline,
"refseq" : args.refseq,
"tmpdir" : work_dir
})
run_command(cmd)
shutil.move( "%s/out.bam" % (work_dir), args.outbam)
shutil.rmtree(work_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-r", "--refseq", required=True)
#parser.add_argument("-it", "--input-threads", default="2" )
#parser.add_argument("-ot", "--output-threads", default="2" )
parser.add_argument("-i", "--inbam", required=True)
parser.add_argument("-o", "--outbam", required=True)
parser.add_argument("-w", "--workdir", default="./")
args = parser.parse_args()
sys.exit(main(args))