This repository has been archived by the owner on Aug 22, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
58 lines (44 loc) · 1.79 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import json
from zipfile import ZipFile
from tqdm import tqdm
def read_users(filename):
with ZipFile(filename, 'r') as zfile:
with zfile.open('users.json') as users_file:
users_text = users_file.read()
if type(users_text) is not str:
users_text = users_text.decode('utf8')
users = json.loads(users_text)
return {u['id']: u['name'] for u in users}
def message_generator(filename, users=None, show_progress=True):
if users is None:
users = read_users(filename)
msg_id = 0
with ZipFile(filename, 'r') as zfile:
if show_progress:
files = tqdm(zfile.namelist())
else:
files = zfile.namelist()
for name in files:
if name.count('/') != 1 or not name.endswith('.json'):
continue
channel, _ = name.split('/')
with zfile.open(name) as msgs_file:
msgs_text = msgs_file.read()
if type(msgs_text) is not str:
msgs_text = msgs_text.decode('utf8')
messages = json.loads(msgs_text)
for m in messages:
if 'user' not in m:
continue
user_id = m['user']
if user_id not in users:
continue
user = users[user_id]
ts = float(m['ts'])
msg_id += 1
text = m.get('text', "NO_TEXT")
if 'reactions' in m:
reactions = {r['name']: [users[u] for u in r['users'] if u in users] for r in m['reactions']}
else:
reactions = {}
yield msg_id, channel, user, text, ts, reactions