-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
executable file
·82 lines (65 loc) · 2.61 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
'''
Created on 2010-07-26
@author: Shobhit Jain
@contact: [email protected]
'''
class Parser(object):
'''
Parser class implement functions for parsing input data files.
'''
def __init__(self):
'''
Constructor
'''
super(Parser, self).__init__()
def _obo_parser(self, obo_file):
'''
Parser for Gene Ontology obo files. go_annotations variable is
updated.
'''
file = open(obo_file, 'r')
flag = 0
for line in file:
line = line.strip()
if line.startswith('[Term]'):
flag = 1
node = ''
name = ''
domain = ''
parent = set()
elif flag == 1 and line == '':
flag = 0
self._add_node(node)
for term in parent:
self._add_node(term)
self._add_edge(term, node)
self.go_annotations[node] = {'name':name,
'domain':domain,
'gene':set(),
'ancestors':set(),
'cluster':{}
}
elif flag == 1 and line.startswith("id"):
node = line.split('id:')[1].strip()
elif flag == 1 and line.startswith("namespace"):
domain = line.split('namespace:')[1].strip()
elif flag == 1 and line.startswith("name"):
name = line.split('name:')[1].strip()
elif flag == 1 and line.startswith("is_a"):
parent.add(line.split(' ')[1])
elif flag == 1 and line.startswith("relationship"):
parent.add(line.split(' ')[2])
def _go_annotations(self, gene_file, cd):
'''
Parser for gene annotation file (SGD/human). go_annotations variable is updated.
'''
file = open(gene_file, 'r')
for line in file:
line = line.strip()
if line != "" and not line.startswith('!'):
line = line.split('\t')
term = line[4].strip()
gene = line[1].strip()
code = line[6].strip()
if term in self.go_annotations and code != cd:
self.go_annotations[term]['gene'].add(gene)