forked from eukref/pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eukref_dbparser01.py
38 lines (32 loc) · 1.11 KB
/
eukref_dbparser01.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""
usage
python annotatetree.py thecamoebids.clustered.fasttree.nex thecamoebids.clustered.fasttree.txt
"""
import sys
import re
infile = open(sys.argv[1], "r") # annotated tree nexus file
lines = infile.readlines()
infile.close()
outfile = open(sys.argv[2], "w") # tab delimited outfile
for line in lines:
if re.search('!name=', line):
print line
annotation = line.split('!name="')[1]
annotation = annotation.split('"')[0]
if not line.startswith("\t'gi+") and not line.startswith("\t'gi|"):
pass
else:
if line.startswith("\t'gi+"):
separator = "+"
else:
separator = "|"
accession = line.split(separator)[3]
accession = accession.split(separator)[0]
if accession == "noaccesssion":
accession2 = line.split(separator)[4]
accession2 = accession2.split("'")[0]
outfile.write(accession2 + "\t" + annotation + "\n")
else:
outfile.write(accession + "\t" + annotation + "\n")
outfile.close()
sys.exit()