-
Notifications
You must be signed in to change notification settings - Fork 2
/
parse.py
74 lines (66 loc) · 2.32 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import json
import os
import pickle
import sys
# Check arguments.
if len(sys.argv) < 4:
print("Error: missing arguments.")
print("Usage: parse.py {-m|--member|-w|--watch} {<input-json-directory>|<input-json-file>} <output-data-basename>")
sys.exit(1)
if sys.argv[1] in ["-m", "--member"]:
EVENT_TYPE = "MemberEvent"
elif sys.argv[1] in ["-w", "--watch"]:
EVENT_TYPE = "WatchEvent"
else:
print("Error: invalid event type {}.".format(sys.argv[1]))
sys.exit(1)
# Collect files.
in_file = sys.argv[2]
if os.path.isdir(in_file):
files = [os.path.join(in_file, f)
for f in os.listdir(in_file)
if os.path.splitext(f)[-1] == ".json"]
else:
files = [in_file]
print("{} files.".format(len(files)))
# Parsing.
user_id2name = {}
repo_id2name = {}
user_repo_edges = []
for filename in files:
print(filename)
f = open(filename, "r")
for line in f:
data = json.loads(line)
if data["type"] == EVENT_TYPE:
if data["type"] == "MemberEvent" and \
data["payload"]["action"] != "added":
continue
actor_name = data["actor"]["login"]
actor_id = str(data["actor"]["id"])
repo_name = data["repo"]["name"]
repo_id = data["repo"]["id"]
if not actor_id in user_id2name:
user_id2name[actor_id] = actor_name
if not repo_id in repo_id2name:
repo_id2name[repo_id] = repo_name
if data["type"] == "MemberEvent":
member_name = data["payload"]["member"]["login"]
member_id = str(data["payload"]["member"]["id"])
if not member_id in user_id2name:
user_id2name[member_id] = member_name
user_repo_edges.append((member_id, repo_id))
user_repo_edges.append((actor_id, repo_id))
data = json.loads(line)
f.close()
print("Users: {}".format(len(user_id2name)))
print("Repos: {}".format(len(repo_id2name)))
print("Edges: {}".format(len(user_repo_edges)))
# Save to files.
print("Saving...")
with open("{}.user".format(sys.argv[3]), "wb") as f:
pickle.dump(user_id2name, f)
with open("{}.repo".format(sys.argv[3]), "wb") as f:
pickle.dump(repo_id2name, f)
with open("{}.edge".format(sys.argv[3]), "wb") as f:
pickle.dump(user_repo_edges, f)