-
Notifications
You must be signed in to change notification settings - Fork 1
/
build_taxonomic_tree.py
63 lines (50 loc) · 1.69 KB
/
build_taxonomic_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/lusr/bin/python
'''
Reads in a taxonomy file, outputs a taxonomic tree in newick format
Created on Jan 31, 2012
python2.7 build_taxonomic_tree.py ../data/taxonomy/all_taxon.taxonomy taxonomic.tree
python2.7 build_taxonomic_tree.py ../data/taxonomy/all_taxon.taxonomy taxonomic.tree ../data/taxonomy/species.mapping
Converted for use in Python 3 by Mike Nute some time in 2018.
@author: namphuon
'''
from dendropy import Tree, Node, Taxon
import sys
import os
if __name__ == '__main__':
taxonomyFile = sys.argv[1]
speciesList = sys.argv[2]
taxonomyTree = sys.argv[3]
species = {}
lines = open(speciesList,'r')
for line in lines:
species[line.strip()] = line.strip()
lines = open(taxonomyFile,'r')
header = lines.readline()
nodes_dict = {}
#Read first line, root node
line = lines.readline()
results = line.strip().split(',')
tree = Tree()
root = Node()
root.__dict__['label'] = results[0].replace("\"","")
nodes_dict[results[0].replace("\"","")] = root
prune = ['1']
#Add root node to tree
tree.__dict__['_seed_node'].add_child(root)
for line in lines:
results = line.strip().split(',')
node = Node();
node.__dict__['label'] = results[0].replace("\"","")
node.taxon = Taxon(results[0].replace("\"",""))
nodes_dict[results[0].replace("\"","")] = node
nodes_dict[results[1].replace("\"","")].add_child(node)
if results[0].replace("\"","") not in species:
prune.append(results[0].replace("\"",""))
for taxa in prune:
nodes_dict[taxa].label=''
# tree.delete_outdegree_one_nodes()
tree.suppress_unifurcations()
output = open(taxonomyTree, 'w')
output.write(str(tree) + ";");
output.close()
lines.close()