-
Notifications
You must be signed in to change notification settings - Fork 0
/
chempartlib.py
243 lines (206 loc) · 8.05 KB
/
chempartlib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
from networkit import *
import math, sys
def continuousPartition(G, X, tolerance, k=0, isCharged={}):
"""
Partition G into subsets of size X +/- tolerance and with consecutive node ids. Charged nodes are not grouped into the same subset.
"""
# validate input
n = G.numberOfNodes()
if len(isCharged) > 0:
assert(len(isCharged)==n)
if k > 0:
assert(sum(isCharged) <= k)
else:
isCharged = [False for i in range(n)]
assert(X > 0)
assert(tolerance >= 0)
if (n % X > 0 or (k>0 and k*X != n)) and tolerance == 0:
if (k > 0):
print("Creating", k, "partitions of size", X, "from a graph with", n, " nodes and tolerance 0 is impossible.")
else:
print("Creating partitions of size", X, "from a graph with", n, " nodes and tolerance 0 is impossible.")
print("Set tolerance to 1.")
tolerance = 1
if k > 0:
assert(n <= (X + tolerance)*k)
assert(n >= (X - tolerance)*k)
maxNumberOfPartitions = int(n / max(X - tolerance, 1)) if (k == 0) else k
def w(i):
"""
Weight of cutting after node i.
TODO: consider other weights beside the single cut edge
"""
assert(i >= 0)
assert(i < n)
if (i == n-1):
return 0
else:
return G.weight(i,i+1)
# allocate cut and predecessor table
table = [[float("inf") for j in range(maxNumberOfPartitions)] for i in range(n)]
pred = [[-1 for j in range(maxNumberOfPartitions)] for i in range(n)]
# fill table
for i in range(n):
for j in range(maxNumberOfPartitions):
if (j == 0):
if abs(i+1-X) <= tolerance:
table[i][j] = w(i)
elif (i >= (X-tolerance)):
windowStart = max(i-(X+tolerance),0)
# make sure that no two charged nodes are in the same partition
chargeEncountered = False
for l in reversed(range(windowStart, i+1)):
assert(l >= windowStart)
if isCharged[l]:
if chargeEncountered:
windowStart = l
break
else:
chargeEncountered = True
predList = [table[l][j-1] for l in range(windowStart, min(i,i-(X-tolerance)+1))]
if (len(predList) > 0):
minPred = min(predList)
table[i][j] = minPred + w(i)
pred[i][j] = predList.index(minPred) + windowStart
# get result from table
resultlist = [table[n-1][j] for j in range(maxNumberOfPartitions)]
if len(resultlist) == 0:
raise ValueError("Combination of parameters allows no partition!")
# if k is given, select best solution with k subsets. If not, select best overall solution
if (k == 0):
bestCutValue = min(resultlist)
k = resultlist.index(bestCutValue) + 1
else:
bestCutValue = table[n-1][k-1]
if (bestCutValue == float("inf")):
raise ValueError("Combination of parameters allows no partition!")
result = partitioning.Partition(n)
result.setUpperBound(k)
# search best path backwards
j = k-1
i = n-1
c = bestCutValue
while (j > 0):
nextI = pred[i][j]
assert(nextI >= 0)
# assign partitions to nodes
for l in range(nextI+1, i+1):
result[l] = j
j -= 1
c -=w(i)
i = nextI
# assign partitions to first nodes not covered by previous loop
for l in range(0, nextI+1):
result[l] = 0
# check results:
for i in range(n):
assert(result[i] >= 0)
assert(result[i] < k)
for size in result.subsetSizes():
if (abs(size-X) > tolerance):
print("For n=", n, ", k=", k, ", X=", X, ", tolerance=", tolerance, ", ", size, " is wrong.")
assert(abs(size-X) <= tolerance)
return result
def spiralLayout(G, k, rowheight = 10, colwidth = 10):
"""
Return two lists, of x and y coordinates for a spiral layout of G.
k nodes are put in one row, keywords rowheight and colwidth determine spacing
"""
n = G.numberOfNodes()
z = G.upperNodeIdBound()
x = [0 for i in range(z)]
y = [0 for i in range(z)]
for i in range(z):
if G.hasNode(i):
if int(i / k) % 2 > 0:
x[i] = colwidth*(k-(i % k)-1)
else:
x[i] = colwidth*(i % k)
y[i] = rowheight*int(i / k)
# adapt coordinates for rounded bends
ydelta = int(rowheight / 4)
xdelta = colwidth*(1-math.cos(math.pi/3))
rightwards = int(i / k) % 2 == 0
if i % k == k-1:
y[i] += ydelta
x[i] = x[i] - xdelta if rightwards else x[i] + xdelta
if i > 0 and i % k == 0:
y[i] -= ydelta
x[i] = x[i] - xdelta if not rightwards else x[i] + xdelta
for i in range(z):
x[i] += 1# gephi ignores coordinates with value 0
y[i] += 1
return x, y
def naivePartition(G, X):
"""
Chop a new fragment off G every X nodes
"""
n = G.numberOfNodes()
naivePart = partitioning.Partition(n)
naivePart.allToSingletons()
for i in range(n):
naivePart.moveToSubset(int(i/X), i)
return naivePart
def mlPartition(G, k, imbalance, isCharged={}):
"""
Use a multi-level approach with Fiduccia-Matheyses to partition G.
Subsets have size at most (1+imbalance)*ceil(n/k)
"""
n = G.numberOfNodes()
if len(isCharged) > 0:
assert(len(isCharged)==n)
if k > 0:
assert(sum(isCharged) <= k)
else:
isCharged = [False for i in range(n)]
listOfChargedNodes = [i for i in range(n) if isCharged[i]]
mlp = partitioning.MultiLevelPartitioner(G, k, imbalance, False, listOfChargedNodes, True)
mlp.run()
return mlp.getPartition()
def greedyPartition(G, sizelimit, isCharged={}):
"""
Starting with singleton clusters, greedily merge the heaviest edge as long as smaller than sizelimit.
"""
n = G.numberOfNodes()
if len(isCharged) > 0:
assert(len(isCharged)==n)
else:
isCharged = [False for i in range(n)]
n = G.numberOfNodes()
part = partitioning.Partition(n)
part.allToSingletons()
chargedPartitions = set([part.subsetOf(i) for i in range(n) if isCharged[i]])
def getWeight(edge):
return G.weight(edge[0], edge[1])
sortedEdges = sorted(G.edges(), key=getWeight)
# merge heaviest edge, as long as allowed
while len(sortedEdges) > 0:
allowed = True
heaviestEdge = sortedEdges.pop()
firstPart = part.subsetOf(heaviestEdge[0])
secondPart = part.subsetOf(heaviestEdge[1])
if firstPart in chargedPartitions and secondPart in chargedPartitions:
allowed = False
sizeMap = part.subsetSizeMap()
if sizeMap[firstPart] + sizeMap[secondPart] > sizelimit:
allowed = False
partSet = {firstPart, secondPart}
for i in range(n-1):
if part[i] in partSet and part[i+2] in partSet and not part[i+1] in partSet:
allowed = False #otherwise, would create single embedded node
if allowed:
part.mergeSubsets(firstPart, secondPart)
if firstPart in chargedPartitions or secondPart in chargedPartitions:
chargedPartitions.add(part.subsetOf(heaviestEdge[0]))
part.compact()
return part
def exportToGephi(G, xcoords, ycoords, part):
"""
Export graph to Gephi, along with coordinates and partition
"""
client = gephi.streaming.GephiStreamingClient()
client.clearGraph()
client.exportGraph(G)
client.exportNodeValues(G, part, "partition")
client.exportNodeValues(G, xcoords, 'x')
client.exportNodeValues(G, [-elem for elem in ycoords], 'y')