-
Notifications
You must be signed in to change notification settings - Fork 31
/
load_contributors.py
88 lines (82 loc) · 3.16 KB
/
load_contributors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from app import db, Contributor, BaseLayer, Mixin, Condiment, Seasoning, MAPPER
import os
import pprint
import requests
TOKEN = os.environ['GITHUB_TOKEN']
headers = {'Authorization': 'token %s' % TOKEN}
def get_commits_page(url):
r = requests.get(url, headers=headers)
pages = r.headers['Link'].split(',')
if len(pages) is 2:
return pages[0].split(';')[0][1:-1], r.json()
else:
return None, r.json()
def get_all_commits(url, all_commits=[]):
r = requests.get(url, headers=headers)
yield r.json()
pages = r.headers['Link'].split(',')
if len(pages) is 2:
all_commits = []
url = pages[0].split(';')[0][1:-1]
while True:
next_page, resp = get_commits_page(url)
yield resp
if next_page:
url = next_page
continue
else:
break
def add_contributor(data, contributions):
contributor = db.session.query(Contributor).filter_by(username=data['username']).first()
if not contributor:
contributor = Contributor(**data)
db.session.add(contributor)
db.session.commit()
for contribution in contributions:
ing_type = contribution.split('/')[-2]
if ing_type in MAPPER.keys():
model = MAPPER[ing_type]
ingredient = db.session.query(model).get(contribution)
if not ingredient:
# Might be an opportunity here to make the missing ingredients
continue
if getattr(contributor, ing_type):
stored = getattr(contributor, ing_type)
stored.append(ingredient)
setattr(contributor, ing_type, stored)
else:
setattr(contributor, ing_type, [ingredient])
db.session.add(contributor)
db.session.commit()
return contributor
def load_all(commits_url):
all_commits = []
for page in get_all_commits(commits_url):
all_commits.extend(page)
raw_base = 'https://raw.github.com/%s/master/' % repo_name
ignore_these = ['LICENSE', '.gitignore', '.DS_Store', 'INDEX.md', 'README.md']
conts = []
all_files = []
for commit in all_commits:
commit_detail = requests.get(commit['url'], headers=headers)
commit_data = commit_detail.json()
data = {}
if commit_data.get('author'):
data['username'] = commit_data['author']['login']
data['gravatar'] = commit_data['author']['avatar_url']
else:
data['username'] = commit_data['commit']['author']['name']
data['full_name'] = commit_data['commit']['author']['name']
files = []
for f in commit_data['files']:
base = f['filename'].split('/')[-1]
if base not in ignore_these:
files.append('%s%s' % (raw_base, f['filename']))
all_files.extend(files)
conts.append(add_contributor(data, files))
print 'Updates %s contributors and %s contributions' % (len(conts), len(all_files))
if __name__ == '__main__':
import sys
repo_name = sys.argv[1]
commits_url = 'https://api.github.com/repos/%s/commits' % repo_name
load_all(commits_url)