-
Notifications
You must be signed in to change notification settings - Fork 0
/
edges2graphml.py
executable file
·91 lines (72 loc) · 2.63 KB
/
edges2graphml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# coding=utf-8
import sys
import os
edgeFile = sys.argv[1]
outFile = sys.argv[2]
outPath = os.path.dirname(outFile)
filename = os.path.basename(outFile)
headFile = open(os.path.join(outPath, "head_" + filename),
'w') # file containing nodes
graphFile = open(os.path.join(outPath, "edges_" + filename),
'w') # file containging edges
edgeFormat = '<edge source="{}" target="{}">\n\
<data key="e_nk">{}</data>\n\
<data key="e_dir">{}</data>\n\
<data key="e_iso">{}</data>\n\
</edge>\n'
nodeFormat = '<node id="n{}">\n\
<data key="v_id">{}</data>\n\
</node>\n'
# Printing header
graphmlHeader = '''<?xml version="1.0" encoding="UTF-8"?>
<graphml xmlns="http://graphml.graphdrawing.org/xmlns"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns
http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd">
<key id="v_id" for="node" attr.name="id" attr.type="string"/>
<key id="e_nk" for="edge" attr.name="nk" attr.type="double"/>
<key id="e_dir" for="edge" attr.name="dir" attr.type="boolean"/>
<key id="e_iso" for="edge" attr.name="is_iso" attr.type="double"/>
<graph id="G" edgedefault="undirected">\n'''
headFile.write(graphmlHeader)
nodes = {}
counter = 0
with open(edgeFile) as f:
for i, line in enumerate(f):
# Skipping the first two line of .edge file,
# they contains informations about the run.
data = line.rstrip("\n").split("\t")
if(len(data) > 2):
fRead = data[0]
l1 = int(data[1])
read = data[2]
l2 = int(data[3])
orientation = data[4]
is_iso = data[5]
pos = [tuple(int(a) for a in el.split(",")) for el in data[6:]]
weight = len(pos)
for r in [fRead, read]:
if(r not in nodes):
nodes[r] = str(counter)
counter += 1
graphFile.write(edgeFormat.format(
'n' + nodes[fRead], 'n' + nodes[read], weight, orientation, is_iso))
graphFile.write('</graph></graphml>\n')
graphFile.close()
graphFile.close()
for node, nb in nodes.items():
headFile.write(nodeFormat.format(nb, node))
headFile.close()
print("CONCATENING FILES AND CLEANING")
filenames = [os.path.join(outPath, "head_" + filename),
os.path.join(outPath, "edges_" + filename)]
with open(outFile, 'w') as outfile:
for fname in filenames:
print("cat " + fname + " >> " + outfile.name)
infile = open(fname)
for line in infile:
outfile.write(line)
infile.close()
print("rm " + fname)
os.system("rm " + fname)
print('DONE')