-
Notifications
You must be signed in to change notification settings - Fork 0
/
tag_diff.py
executable file
·48 lines (39 loc) · 1.26 KB
/
tag_diff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# coding=utf-8
import sys
import os
# Report the difference between 2 G2R files.
def parsg2r(g2rFile):
readTag = {}
with open(g2rFile, 'r') as g2rf:
currentGene = ""
for line in g2rf:
if(line[0] == "G"):
currentGene = line.split("\t")[1].rstrip("\n")
elif(line[0] == "R"):
read = line.split("\t")[1].rstrip("\n")
readTag[read] = currentGene
return(readTag)
file1 = sys.argv[1]
file2 = sys.argv[2]
tag1 = parsg2r(file1)
tag2 = parsg2r(file2)
all_reads = set(tag1.keys()) | set(tag2.keys())
f1missing = 0
f2missing = 0
mismatch = 0
for read in all_reads:
try:
if(tag1[read] != tag2[read]):
print("DIFF:", read, tag1[read], tag2[read], sep="\t")
mismatch += 1
except KeyError:
# finding exception origin
if(read not in tag1.keys()):
#print("READ", read, "not in file", os.path.basename(file1), sep="\t")
f1missing += 1
elif(read not in tag2.keys()):
#print("READ", read, "not in file", os.path.basename(file2), sep="\t")
f2missing += 1
print("Mismatch:", mismatch, sep="\t")
print("Missing in file 1:", f1missing, sep="\t")
print("Missing in file 2:", f2missing, sep="\t")