-
Notifications
You must be signed in to change notification settings - Fork 39
/
retrieve_msgs.py
244 lines (213 loc) · 7.15 KB
/
retrieve_msgs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
"""
@author: youyanggu
Tool to retrieve GroupMe messages using the GroupMe API and output them to a CSV file.
"""
import json
import requests
import datetime
import csv
import argparse
import os
parser = argparse.ArgumentParser(description='Tool to retrieve GroupMe messages and output them to a CSV file.')
parser.add_argument('token', help='Access token used to authenticate yourself when making API requests.')
parser.add_argument('-a', '--all', help='Retrieve all groups', action="store_true")
parser.add_argument('-d', '--dm', help='Retrieve all direct messages', action="store_true")
parser.add_argument('-g', '--group', help='Name of group to retrieve. Run without this flag to see list of groups.')
parser.add_argument('-c', '--csv', help='Name of csv file to write to.', default='temp.csv')
parser.add_argument('-o', '--overwrite', help='overwrite csv file', action="store_true")
URL = 'https://api.groupme.com/v3'
TOKEN = None
########## STAT FUNCTIONS ##########
def getNumFavorited(msg):
"""Counts the number of favorites the mssage received."""
num_favorited = msg['favorited_by']
return len(num_favorited)
####################################
##########################
#
# GROUPME API Helper Functions
#
##########################
def get(response):
"""Retrieve the 'response' portion of the json object."""
return response.json()['response']
def getDMs():
"""Returns a dictionary with direct messages."""
params = {'per_page' : 100}
groups = get(requests.get(URL + '/chats' + TOKEN, params=params))
if groups is None:
return None
d = {}
for group in groups:
name = str(group['other_user']['name'].encode('utf-8').strip())
count = group['messages_count']
if count > 0:
d[name] = {}
d[name]['id'] = group['other_user']['id']
d[name]['count'] = count
return d
def getGroups():
"""
Returns a dictionary with group names as keys and a dictionary of
group id and # of messages as values
"""
params = {'per_page' : 100}
groups = get(requests.get(URL + '/groups' + TOKEN, params=params))
if groups is None:
return None
d = {}
for group in groups:
name = group['name'].encode('utf-8').strip()
count = group['messages']['count']
if count > 0:
d[name] = {}
d[name]['id'] = group['group_id']
d[name]['count'] = count
return d
def getGroup(group_id, direct_msgs=False):
if direct_msgs:
params = {'other_user_id' : group_id}
group = get(requests.get(URL + '/direct_messages' + TOKEN, params=params))
else:
group = get(requests.get(URL + '/groups/' + group_id + TOKEN))
return group
def getGroupName(group_id, direct_msgs):
group = getGroup(group_id, direct_msgs)
if direct_msgs:
return group_id
else:
return str(group['name'])
def getGroupCount(group_id, direct_msgs):
group = getGroup(group_id, direct_msgs)
if direct_msgs:
return int(group['count'])
else:
return int(group['messages']['count'])
def getGroupNames(groups):
return groups.keys()
def sortByCount(groups):
arr = []
for key in groups:
arr.append((key, groups[key]['count']))
return sorted(arr, key=lambda k: k[1], reverse=True)
def getLastMsgId(group_id, direct_msgs):
group = getGroup(group_id, direct_msgs)
if direct_msgs:
return group['direct_messages'][0]['id']
else:
return group['messages']['last_message_id']
def getMessages(group_id, direct_msgs, before_id=None, since_id=None):
"""
Given the group_id and the message_id, retrieves 20 messages
Params:
before_id: take the 20 messages before this message_id
since_id: take the 20 messages after this message_id (maybe)
"""
params = {}
if before_id is not None:
params['before_id'] = str(before_id)
if since_id is not None:
params['since_id'] = str(since_id)
try:
if direct_msgs:
params['other_user_id'] = group_id
msgs = get(requests.get(URL + '/direct_messages' + TOKEN, params=params))
else:
msgs = get(requests.get(URL + '/groups/' + group_id + '/messages' + TOKEN, params=params))
except ValueError:
return []
return msgs
def countMsgs(group_name, group_id, direct_msgs, csv_file=None, processTextFunc=None, sinceTs=None):
"""
Function that calls GroupMe API and processes messages of a particular group.
Params:
group_id: group_id of group
csv_file: name of output csv file
processTextFunc: a function that processes a msg and returns a value that is appended to user data
sinceTs: only process messages after this timestamp
"""
if csv_file:
f = open(csv_file, "ab")
wr = csv.writer(f, dialect="excel")
if type(sinceTs) == datetime.datetime:
sinceTs = int(sinceTs.strftime("%s"))
totalCount = getGroupCount(group_id, direct_msgs)
print "Counting messages for {} (Total: {})".format(group_name, totalCount)
curCount = 0
users = {}
lastMsgId = str(int(getLastMsgId(group_id, direct_msgs))+1) # get current msg as well
while (curCount < totalCount):
if curCount % 100 == 0:
print curCount
msgs = getMessages(group_id, direct_msgs, lastMsgId)
if not msgs:
break
if direct_msgs:
msgs = msgs['direct_messages']
else:
msgs = msgs['messages']
if not msgs:
break
for msg in msgs:
if msg['created_at'] < sinceTs:
return curCount, users
curCount += 1
try:
created_at = datetime.datetime.fromtimestamp(msg['created_at']).strftime('%Y-%m-%d %H:%M:%S')
except:
print "Error parsing created_at"
created_at = ""
user = msg['name']
text = msg['text']
likes = getNumFavorited(msg)
if text is None:
text = ""
if user is None:
user = ""
if created_at is None:
created_at = ""
if user not in users:
users[user] = []
if csv_file:
wr.writerow([group_name, created_at.encode('utf-8'), user.encode('utf-8'), text.encode('utf-8'), likes])
if processTextFunc is not None:
data = processTextFunc(msg)
users[user].append(data)
lastMsgId = msgs[-1]['id']
if csv_file:
f.close()
return curCount, users
def main(retrieve_all, direct_msgs, group_name, csv_file, overwrite):
if direct_msgs:
groups = getDMs()
else:
groups = getGroups()
if groups is None:
raise RuntimeError("Cannot retrieve groups. Is your token correct?")
if retrieve_all:
for k, v in groups.iteritems():
new_csv_file = k.lower().replace(' ', '_')+'.csv' if not csv_file else csv_file
count, _ = countMsgs(k, v['id'], direct_msgs, csv_file=new_csv_file)
print "Processed {} messages. Wrote to {}.".format(count, csv_file)
elif group_name:
if group_name not in groups:
print "Group name not found. Here are the list of groups:"
print getGroupNames(groups)
else:
if csv_file and os.path.isfile(csv_file) and not overwrite:
raise IOError("File already exists. Try setting --overwrite.")
if not csv_file:
csv_file = group_name.lower().replace(' ', '_')+'.csv'
count, _ = countMsgs(group_name, groups[group_name]['id'], direct_msgs, csv_file=csv_file)
print "Processed {} messages. Wrote to {}.".format(count, csv_file)
else:
sorted_groups = sortByCount(groups)
print "Here is all the groups and their message counts:"
print sorted_groups
if __name__ == "__main__":
args = parser.parse_args()
token = args.token
if len(token) not in [32, 40]:
raise IOError("Invalid token. Please enter a 32-char or 40-char string.")
TOKEN = "?token=" + token
main(args.all, args.dm, args.group, args.csv, args.overwrite)