-
Notifications
You must be signed in to change notification settings - Fork 0
/
find_rejected_edges.py
executable file
·63 lines (53 loc) · 1.98 KB
/
find_rejected_edges.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# coding=utf-8
# Search for edges in an edge file
# that are not in the same community according to a partition
# format are .edge for edge input and g2rl for partition.
import sys
from collections import defaultdict
g2rfile = sys.argv[1]
edge_file = sys.argv[2]
THRESHOLD = int(sys.argv[3]) if len(sys.argv) > 3 else 1
# Fetching communityfrom G2R file
community = {}
with open(g2rfile) as f:
current_community = ""
for line in f:
line = line.rstrip("\n")
tag, element = line.split("\t")
if(tag == "R"):
community[element] = current_community
else:
current_community = element
# count the number of rejected edges for each node
rejected_node_count = defaultdict(int)
# Count the number of edges for each node
edge_count = defaultdict(int)
# going through the edge file, testing edges
with open(edge_file) as f:
current_source = ""
current_community = ""
for line in f:
line = line.rstrip("\n")
data = line.split("\t")
if(len(data) > 2):
read1 = data[0]
read2 = data[2]
# limit the number of check in community dict
if(read1 != current_source):
current_source = read1
current_community = community[read1]
tgt_community = community[read2]
# printing only line were edges are not in the same community.
# this mean our clustering decided to cut those edges.
if(current_community != tgt_community):
rejected_node_count[read1] += 1
rejected_node_count[read2] += 1
# Increase edge count for each node
edge_count[read1] += 1
edge_count[read2] += 1
# Printing results
for node in sorted(rejected_node_count.keys(), key=lambda x: rejected_node_count[x], reverse=True):
count = rejected_node_count[node]
if(count > THRESHOLD):
# print(node, rejected_node_count[node], edge_count[node], sep="\t")
print(node)