-
Notifications
You must be signed in to change notification settings - Fork 0
/
NetworkxMap.py
147 lines (121 loc) · 4.13 KB
/
NetworkxMap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import collections
import csv
G = nx.Graph()
bc = {}
cc = {}
dc = {}
visited = {}
def build():
df = pd.read_csv("filemap.csv")
f = open("developer_edges.txt", "w")
for idx, x in df['committers'].iteritems():
committers = x.split()
for committer in committers:
first_committer = committer
if not G.has_node(first_committer):
G.add_node(first_committer)
bc[first_committer] = 0
dc[first_committer] = 0
for other in committers:
if first_committer != other:
if not G.has_node(other):
G.add_node(other)
bc[other] = 0
dc[other] = 0
if not G.has_edge(first_committer, other):
G.add_edge(first_committer, other)
f.write(first_committer + ":" + other + "\n")
def bfs(start_node, end_node):
queue = collections.deque()
predecessor = {}
queue.append(start_node)
predecessor[start_node] = None
while queue and queue[0] != end_node:
n = queue.popleft()
for neighbor in list(G.neighbors(n)):
if neighbor not in predecessor:
predecessor[neighbor] = n
queue.append(neighbor)
if not queue:
return None
else:
path = list()
path.append(end_node)
pred = predecessor[end_node]
while pred is not None:
path.append(pred)
pred = predecessor[pred]
return path
def degree_centrality():
global dc
for edge in G.edges():
dc[edge[0]] += 1
dc[edge[1]] += 1
sum = G.number_of_nodes() - 1
for key, value in dc.items():
dc[key] = value/sum
def closeness_centrality():
global cc
for start_node in G.nodes():
for end_node in G.nodes():
if start_node != end_node and not visited.get((start_node,
end_node), None):
path = bfs(start_node, end_node)
visited[(start_node, end_node)] = True
visited[(end_node, start_node)] = True
cc[start_node] += len(path) - 1
cc[end_node] += len(path) - 1
sum = G.number_of_nodes() - 1
for key, value in dc.items():
cc[key] = sum / value
def display_network():
print()
print("total nodes in graph:", G.number_of_nodes())
print("total edges in graph:", G.number_of_edges())
nx.draw(G)
plt.savefig("path_graph.pdf")
with open('output.csv', 'w') as f:
w = csv.writer(f, delimiter=',')
w.writerow(["email", "degree centrality", "betweenness centrality",
"closeness centrality"])
for node in G.nodes():
w.writerow([node, dc[node], bc[node], cc[node]])
def betweenness_centrality():
global bc
path_counter = 0
for start in G.nodes():
for end in G.nodes():
if start != end and not visited.get((start, end), None):
visited[(start, end)] = True
visited[(end, start)] = True
for path in nx.all_shortest_paths(G, start, end):
if len(path) > 2:
for element in path[1:-1]:
bc[element] += 1
path_counter += 1
else:
path_counter += 1
for k, v in bc.items():
bc[k] = v / path_counter
sum1 = sum(bc.values())
for key, val in bc.items():
bc[key] = val / sum1
def main():
build()
print("network built")
betweenness_centrality()
print("betweenness centrality calculated")
degree_centrality()
print("degree centrality calculated")
closeness_centrality()
print("closeness centrality calculated")
display_network()
print()
print("check developer_edges.txt for graph edges")
print("check path_graph.pdf for node display")
print("check output.csv for centrality output")
if __name__ == '__main__':
main()