This repository has been archived by the owner on Jul 7, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
population_per_month.py
88 lines (77 loc) · 2.14 KB
/
population_per_month.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import pymongo
import time
import json
client = pymongo.MongoClient()
usersCollection = client.wikimedia_user_metrics.users
def get_months():
months = []
for year in range(2001, 2021):
for month in (range(1, 13) if year < 2020 else range(1, 10)):
months.append(str(year) + "/" + str(month))
return months
def get_obj():
print('Starting get_obj', time.time())
print('Getting data', time.time())
parsedMonths = list(usersCollection.aggregate([
{
'$match': {
'is_bot': False
}
}, {
'$project': {
'creation_month': {
'$concat': [
{
'$toString': {
'$year': '$created'
}
}, '/', {
'$toString': {
'$month': '$created'
}
}
]
}
}
}, {
'$group': {
'_id': '$creation_month',
'count': {
'$sum': 1
}
}
}, {
'$group': {
'_id': None,
'data': {
'$addToSet': {
'k': '$_id',
'v': '$count'
}
}
}
}, {
'$project': {
'_id': False,
'data': {
'$arrayToObject': '$data'
}
}
}
]))
print('Got data ', time.time())
return parsedMonths[0]['data']
def accumulate(obj):
months = get_months()
for i in range(1, len(months)):
if months[i] in obj:
prev = obj[months[i - 1]] if months[i - 1] in obj else 0
obj[months[i]] = obj[months[i]] + prev
del obj['2020/10']
del obj['2020/11']
return obj
obj = get_obj()
obj = accumulate(obj)
text = json.dumps(obj)
with open("population_history.json", 'w') as outfile:
outfile.write(text)