-
Notifications
You must be signed in to change notification settings - Fork 0
/
mprt.py
63 lines (44 loc) · 1.16 KB
/
mprt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import re
from fasta_tools import fasta_read_raw, get_uniprot_fasta
file = './datasets/rosalind_mprt.txt'
motif = 'N{P}[ST]{P}'
def regexify(motif=motif):
left_pttrn = r'{'
left_repl = '[^'
right_pttrn = r'}'
right_repl = ']'
motif = re.sub(left_pttrn, left_repl, motif)
motif = re.sub(right_pttrn, right_repl, motif)
return motif
def check_for_motif(ID, motif=motif):
# make motif into regex and compile
motif = regexify(motif)
pattern = re.compile(motif)
# read data from uniprot and format
data = get_uniprot_fasta(ID)
data = fasta_read_raw(data)
assert(len(data.items()) == 1)
_, data = list(data.items())[0]
# find all matches for the motif
# https://mail.python.org/pipermail/tutor/2005-September/041120.html
results = []
pos = 0
while True:
result = pattern.search(data, pos)
if result is None:
break
results.append(result.start()+1)
pos = result.start() + 1
return results
if __name__ == '__main__':
with open(file) as f:
for ID in f:
ID = ID.strip()
results = check_for_motif(ID)
outp = ''
if len(results) > 0:
print(ID)
outp = ""
for result in results:
outp += str(result) + " "
print(outp)