This repository has been archived by the owner on May 7, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
countdown.py
executable file
·298 lines (258 loc) · 11 KB
/
countdown.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
#! /usr/bin/env python
import cherrypy
from flask import Flask, g, render_template, request
from flask_assets import Environment, Bundle
from rethinkdb import RethinkDB
import requests
import json
import yaml
from apscheduler.scheduler import Scheduler
import logging
from datetime import datetime
import os, sys, socket
import argparse
r = RethinkDB()
# Configuration and static variables
def open_yaml(f):
return open(os.path.join(os.path.dirname(os.path.realpath(__file__)), f))
try:
config = yaml.load(open_yaml('config.yaml'))
except IOError:
print "No configuration file found (see config.example.yaml for a sample configuration.)"
sys.exit()
try:
users = yaml.load(open_yaml('users.yaml'))
except IOError:
print "No user projects specified (see users.example.yaml for a sample configuration.)"
users = {
'github-users': []
}
HEADERS = {'Authorization': 'token ' + config['oauth']}
URL = 'https://api.github.com/repos/'+config['repo']
MILESTONES = map(str, config['milestones'])
UPDATE_INTERVAL = config['update_interval'] # in minutes
STATS_TABLE = 'stats'
ISSUES_TABLE = 'issues'
METADATA_TABLE = 'metadata'
# Simple utility functions
def connect_to_db():
try:
return r.connect(host=config['rethinkdb']['host'], port=config['rethinkdb']['port'], db=config['rethinkdb']['db'])
except socket.error, (value, message):
c = config['rethinkdb']
print "Could not connect to RethinkDB on %s:%s (database: %s).\nError message: %s" % (c['host'], c['port'], c['db'], message)
sys.exit()
def update_data(check_for_existing_data=False):
conn = connect_to_db()
issue_count = r.table(ISSUES_TABLE).count().run(conn)
stats_count = r.table(STATS_TABLE).count().run(conn)
# If the last recorded report is significantly older than the last report we fetched (or if we have no reports), update the data
if stats_count > 0:
last_date = datetime.strptime(r.table(STATS_TABLE).order_by(r.desc('datetime'))[0]['datetime'].run(conn), "%Y-%m-%dT%H:%M:%S.%fZ")
if (datetime.utcnow() - last_date).total_seconds() / 60 >= UPDATE_INTERVAL:
check_for_existing_data = False
else:
check_for_existing_data = False
if not check_for_existing_data or (check_for_existing_data and issue_count == 0):
pull_new_issues(conn)
if not check_for_existing_data or (check_for_existing_data and stats_count == 0):
generate_stats(conn)
conn.close()
def pull_new_issues(rdb_conn):
issues = []
print "Pulling issues from Github repo %s:" % config['repo']
for state in ['open','closed']:
page_num = 0
while True:
url = "%s/issues?page=%d&state=%s" % (URL, page_num, state)
sys.stdout.write("Processing page %d of %s issues. \r" % (page_num, state))
sys.stdout.flush()
gh_issue_set = requests.get(url=url, headers=HEADERS).json()
if 'message' in gh_issue_set and gh_issue_set['message'] == 'Not Found':
print "No issues found for the %s repository." % config['repo']
return
if gh_issue_set == []:
break
issues += gh_issue_set
page_num += 1
print "Pulled a total of %d issues (not necessarily unique)." % len(issues)
sys.stdout.write("Deleting existing issues.\r")
sys.stdout.flush()
r.table(ISSUES_TABLE).delete().run(rdb_conn)
sys.stdout.write("Inserting issues into RethinkDB.\r")
sys.stdout.flush()
r.table(ISSUES_TABLE).insert(issues).run(rdb_conn)
num_inserted = r.table(ISSUES_TABLE).count().run(rdb_conn)
print "Inserted %d unique issues into RethinkDB." % num_inserted
def generate_stats(rdb_conn):
issues = r.table(ISSUES_TABLE)
issues_with_milestone = issues.filter(lambda issue: issue['milestone'] != None)
milestones = issues_with_milestone.map(lambda issue: issue['milestone']['title']).distinct()
# Generate user stats (how many issues assigned to this user have been opened and closed) for a particular set of issues
def user_stats(issue_set):
# Remove issues that don't have owners from the issue set
issue_set = issue_set.filter(lambda issue: issue['assignee'] != None)
# Get a list of users issues are assigned to
owners = issue_set.map(lambda issue: issue['assignee']).distinct()
# Count the issues with a given owner and state (shorthand since we reuse this)
def count_issues(owner,state):
return issue_set.filter(lambda issue: (issue['assignee']['login'] == owner['login']) & (issue['state'] == state)).count()
# Return a list of documents with stats for each owner
return owners.map(lambda owner: {
'owner': owner['login'],
'owner_avatar_url': owner['avatar_url'],
'open_issues': count_issues(owner,'open'),
'closed_issues': count_issues(owner,'closed'),
})
# Return owner stats for a particular milestone (filter issues to just include a milestone)
def user_stats_by_milestone(m):
return user_stats(issues_with_milestone.filter(lambda issue: issue['milestone']['title'] == m))
# Return the number of issues with a particular state (and optionally a particular milestone)
def num_issues(state, milestone=None):
if milestone is None:
issue_set = issues
else:
issue_set = issues_with_milestone.filter(lambda issue: issue['milestone']['title'] == milestone)
return issue_set.filter(lambda issue: issue['state'] == state).count()
# Two key things:
# - we have to call coerce_to('array') since we get a sequence, and this will error otherwise
# - we have to call list() on the stats to make sure we pull down all the data from a Cursor
report = r.expr({
'datetime': r.js('(new Date).toISOString()'),
'by_milestone': r.expr([{
'milestone': 'all',
'open_issues': num_issues('open'),
'closed_issues': num_issues('closed'),
'user_stats': user_stats(issues).coerce_to('array')
}]).union(milestones.map(lambda m: {
'milestone': m,
'open_issues': num_issues('open', m),
'closed_issues': num_issues('closed', m),
'user_stats': user_stats_by_milestone(m).coerce_to('array')
}))
})
# Add the generated report to the database
print "Generating and inserting new user stats at %s" % datetime.now().strftime("%Y-%m-%d %H:%M")
r.table(STATS_TABLE).insert(r.expr([report])).run(rdb_conn)
# Build a chained boolean expression that tests if a given ReQL value is in an array
def is_in_array(reql_value, array):
query = False
for value in array:
query = query | (reql_value == value)
return query
# Flask application
app = Flask(__name__)
assets = Environment(app)
bundle_less = Bundle('countdown.less', filters='less', output='gen/countdown.css')
assets.register('countdown_css', bundle_less)
bundle_coffee = Bundle('countdown.coffee', filters='coffeescript', output='gen/countdown.js')
assets.register('countdown_js', bundle_coffee)
bundle_js = Bundle('vendor/jquery-1.9.1.min.js',
'vendor/jquery.flot.js',
'vendor/jquery.flot.time.js',
'vendor/jquery.flot.resize.js',
'vendor/underscore-min.js',
'vendor/bootstrap.min.js',
'vendor/handlebars.js',
'vendor/swag.min.js',
filters='rjsmin', output='gen/vendor.js')
assets.register('vendor_js', bundle_js)
@app.before_request
def before_request():
g.rdb_conn = connect_to_db()
@app.teardown_request
def teardown_request(exception):
g.rdb_conn.close()
@app.route('/')
def index():
return render_template('countdown.html')
# TODO
# Currently this sends a subset of the reports to the client, filtered for the
# milestones and with a bit of metadata (datetime of the report).
#
# Ultimately, we would want to do a reduction and create a whole new object
# (this should be done with a groupBy). But until groupBy is more flexible,
# we'll stick with this Gordian Knot.
@app.route('/get_data')
def get_data():
selection = list(r.table(STATS_TABLE).order_by('datetime').map(lambda report:
report['by_milestone'].filter(lambda report_by_m:
is_in_array(report_by_m['milestone'], MILESTONES)
).map(lambda filtered_report:
filtered_report.merge({'datetime': report['datetime']})
)).run(g.rdb_conn))
return json.dumps(selection)
@app.route('/latest')
def latest():
last_report = r.table(STATS_TABLE).order_by(r.desc('datetime'))[0]
selection = last_report['by_milestone'].filter(lambda report_by_m:
is_in_array(report_by_m['milestone'], MILESTONES)
).map(lambda filtered_report:
filtered_report.merge({'datetime': last_report['datetime']})
).run(g.rdb_conn)
return json.dumps(selection)
@app.route('/get_deadline')
def get_deadline():
return json.dumps({
'deadline': config['deadline'],
'milestones': MILESTONES,
'user_projects': users['github-users']
})
@app.route('/test_status', methods=['GET', 'POST'])
def test_status():
if request.method == 'POST':
conn = connect_to_db()
test_status_to_insert = {
'type': 'test_status',
'num_passing': int(request.form['num_passing']),
'num_failing': int(request.form['num_failing'])
}
r.table(METADATA_TABLE).insert(test_status_to_insert, upsert=True).run(conn)
conn.close()
conn = connect_to_db()
test_status = r.table(METADATA_TABLE).get('test_status').run(conn)
conn.close()
return json.dumps(test_status)
# Parse command-line arguments
# TODO disabled until table_create works
"""
parser = argparse.ArgumentParser()
parser.add_argument("--firstrun", action="store_true", help="Create the database and tables required for Countdown.")
args = parser.parse_args()
if args.firstrun:
conn = connect_to_db()
r.db_create(config['rethinkdb']['db'])
r.table_create(STATS_TABLE)
r.table_create(ISSUES_TABLE)
r.table_create(METADATA_TABLE)
conn.close()
"""
# Turn logging on by uncommenting this line
if config['logging']:
loglevel = logging.DEBUG
else:
loglevel = logging.CRITICAL
logging.basicConfig(level=loglevel)
# We're using the scheduler to periodically poll for updates
sched = Scheduler()
@sched.interval_schedule(minutes=UPDATE_INTERVAL)
def timed_job():
update_data()
# Kick everything off
if __name__ == '__main__':
sched.start()
update_data(check_for_existing_data=True)
# We use the CherryPy server because it's easy to deploy,
# more robust than the Flask dev server, and doesn't have
# problems with Python threads (aka APScheduler)
cherrypy.tree.graft(app, '/')
cherrypy.tree.mount(None, '/static', {'/': {
'tools.staticdir.dir': app.static_folder,
'tools.staticdir.on': True,
}})
cherrypy.config.update({
'server.socket_host': config['server']['host'],
'server.socket_port': config['server']['port'],
})
cherrypy.engine.start()
cherrypy.engine.block()