-
Notifications
You must be signed in to change notification settings - Fork 10
/
GitStatic.py
326 lines (292 loc) · 11.8 KB
/
GitStatic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
#!/usr/bin/env python
# coding=utf-8
import requests
import os
import json
import threading
import datetime
"""统计的时间区间-开始日期"""
git_root_url = "https://git.example.com"
"""访问Token"""
git_token = "your token"
"""统计结果的存储目录"""
export_path = "/tmp"
"""统计的时间区间-开始日期"""
t_from = "2018-07-23"
"""统计的时间区间-结束日期"""
t_end = "2018-07-30"
"""统计的时间区间-开始日期,datetime对象"""
date_from = datetime.datetime.strptime(t_from, '%Y-%m-%d')
"""统计的时间区间-结束日期,datetime对象"""
date_end = datetime.datetime.strptime(t_end, '%Y-%m-%d')
"""一个线程锁"""
lock = threading.RLock()
user_unknown = {}
user_email_alias_mapping = {}
user_email_name_mapping = {}
class GitlabApiCountTrueLeTrue:
"""
Worker类
"""
"""
所有commit的集合,用于去重。
这里的重复,可能是代码merge造成的
"""
total_commit_map = {}
"""
最终的数据集合
"""
totalMap = {}
def get_projects(self):
"""
获取所有仓库,并生成报告
:return:
"""
threads = []
# 获取服务器上的所有仓库,每个仓库新建一个线程
for i in range(1, 3):
# 线上gitlab可用,问题是没有全部显示
url = '%s/api/v3/projects/all' \
'?private_token=%s&per_page=1000&page=%d&order_by=last_activity_at' % (
git_root_url, git_token, i)
r1 = requests.get(url) # 请求url,传入header,ssl认证为false
r2 = r1.json() # 显示json字符串
for r3 in r2:
value = r3['default_branch']
last_active_time = r3['last_activity_at']
if value is None:
continue
days = date_from - datetime.datetime.strptime(last_active_time, '%Y-%m-%dT%H:%M:%S.%fZ')
# 如果project的最后更新时间比起始时间小,则continue
if days.days > 1:
continue
project_info = ProjectInfo()
project_info.project_id = r3['id']
project_info.name = r3['name']
project_info.project_desc = r3['description']
project_info.project_url = r3['web_url']
project_info.path = r3['path']
# 构件好线程
t = threading.Thread(target=self.get_branches, args=(r3['id'], project_info))
threads.append(t)
# 所有线程逐一开始
for t in threads:
t.start()
# 等待所有线程结束
for t in threads:
t.join()
final_commit_map = {}
for key, project in self.totalMap.items():
for author_email, detail in project.commit_map.items():
exist_detail = final_commit_map.get(detail.author_email)
if exist_detail is None:
final_commit_map[detail.author_email] = detail
else:
exist_detail.total += detail.total
exist_detail.additions += detail.additions
exist_detail.deletions += detail.deletions
final_commit_map[detail.author_email] = exist_detail
write_to_csv("%s/GitStatic_%s/%s_%s.csv" % (export_path, t_from, 'total', t_from), final_commit_map,
"extra")
return
def get_branches(self, project_id, project_info):
"""
获取仓库的所有Branch,并汇总commit到一个map梨
:param project_id:
:param project_info:
:return:
"""
print("进入线程:%d,项目id%d,%s" % (threading.get_ident(), project_id, project_info.project_url))
# 线上gitlab可用,问题是没有全部显示
url = '%s/api/v4/projects/%s/repository/branches?private_token=%s' % (git_root_url, project_id, git_token)
print("start get branch list %d,url=%s" % (project_id, url))
r1 = requests.get(url) # 请求url,传入header,ssl认证为false
r2 = r1.json() # 显示json字符串
if not r2:
return
# branch的map,key为branch名称,value为按照提交者email汇总的,key为email的子map集合
branch_map = {}
# 主动获取master分支的提交
detail_map = self.get_commits(project_id, project_info.project_url, 'master')
print("get commits finish project_id=%d branch master" % project_id)
if detail_map:
branch_map['master'] = detail_map
for r3 in r2:
branch_name = r3['name']
if branch_name is None:
continue
# 如果仓库已经被Merge了,则不再处理
if r3['merged']:
continue
detail_map = self.get_commits(project_id, project_info.project_url, branch_name)
if not detail_map:
continue
# 将结果放到map里
branch_map[branch_name] = detail_map
print("get commits finish project_id=%d branch %s" % (project_id, branch_name))
print("all branch commits finish %d " % project_id)
final_commit_map = {}
# 遍历branch map,并按照提交者email进行汇总
for key, value_map in branch_map.items():
for author_email, detail in value_map.items():
exist_detail = final_commit_map.get(detail.author_email)
if exist_detail is None:
final_commit_map[detail.author_email] = detail
else:
exist_detail.total += detail.total
exist_detail.additions += detail.additions
exist_detail.deletions += detail.deletions
final_commit_map[detail.author_email] = exist_detail
if not final_commit_map:
return
project_info.commit_map = final_commit_map
# 加锁
lock.acquire()
# 此对象会被各个线程操作
self.totalMap[project_info.project_id] = project_info
# 释放锁
lock.release()
# 汇总完毕后,将结果写入到projectID+日期的csv文件里
write_to_csv(
"%s/GitStatic_%s/project/%s_%d.csv" % (export_path, t_from, project_info.path, project_info.project_id),
final_commit_map, project_info.project_url)
def get_commits(self, project_id, project_url, branch_name):
"""
获取指定仓库,指定分支的所有commits,然后遍历每一个commit获得单个branch的统计信息
:param project_id:
:param project_url:
:param branch_name:
:return:
"""
since_date = date_from.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
until_date = date_end.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
url = '%s/api/v4/projects/%s/repository/commits?page=1&per_page=1000&ref_name=%s&since=%s&until=%s&private_token=%s' % (
git_root_url, project_id, branch_name, since_date, until_date, git_token)
r1 = requests.get(url) # 请求url,传入header,ssl认证为false
r2 = r1.json() # 显示json字符串
if not r2:
return
print('start get_commits,projectID=%d,branch=%s,url=%s' % (project_id, branch_name, url))
detail_map = {}
for r3 in r2:
commit_id = r3['id']
if commit_id is None:
continue
# 在这里进行commit去重判断
if self.total_commit_map.get(commit_id) is None:
self.total_commit_map[commit_id] = commit_id
else:
continue
# 这里开始获取单次提交详情
detail = get_commit_detail(project_id, commit_id)
if detail is None:
continue
if detail.total > 5000:
# 单次提交大于5000行的代码,可能是脚手架之类生成的代码,不做处理
continue
# 这里和主流程无关,是用来处理commit记录里的提交者,账号不规范的问题
if detail.author_email in user_unknown:
print("email %s projectid= %d,branchname,%s,url=%s" % (
detail.author_email, project_id, branch_name, project_url))
# 根据email纬度,统计提交数据
exist_detail = detail_map.get(detail.author_email)
if exist_detail is None:
detail_map[detail.author_email] = detail
else:
exist_detail.total += detail.total
exist_detail.additions += detail.additions
exist_detail.deletions += detail.deletions
detail_map[detail.author_email] = exist_detail
return detail_map
def get_commit_detail(project_id, commit_id):
"""
获取单个commit的信息
:param project_id: 工程ID
:param commit_id: commit的id
:return: 返回#CommitDetails对象
"""
url = '%s/api/v4/projects/%s/repository/commits/%s?private_token=%s' \
% (git_root_url, project_id, commit_id, git_token)
r1 = requests.get(url) # 请求url,传入header,ssl认证为false
r2 = r1.json() # 显示json字符串
# print(json.dumps(r2, ensure_ascii=False))
author_name = r2['author_name']
author_email = r2['author_email']
stats = r2['stats']
if 'Merge branch' in r2['title']:
return
if stats is None:
return
temp_mail = user_email_alias_mapping.get(author_email)
if temp_mail is not None:
author_email = temp_mail
temp_name = user_email_name_mapping.get(author_email)
if temp_name is not None:
author_name = temp_name
additions = stats['additions']
deletions = stats['deletions']
total = stats['total']
# details = {'additions': additions, 'deletions': deletions, 'total': total, 'author_email': author_email,
# 'author_name': author_name}
details = CommitDetails()
details.additions = additions
details.deletions = deletions
details.total = total
details.author_email = author_email
details.author_name = author_name
return details
def make_dir_safe(file_path):
"""
工具方法:写文件时,如果关联的目录不存在,则进行创建
:param file_path:文件路径或者文件夹路径
:return:
"""
if file_path.endswith("/"):
if not os.path.exists(file_path):
os.makedirs(file_path)
else:
folder_path = file_path[0:file_path.rfind('/') + 1]
if not os.path.exists(folder_path):
os.makedirs(folder_path)
def write_to_csv(file_path, final_commit_map, extra):
"""
工具方法:将结果写入csv,从#final_commit_map参数解析业务数据
:param file_path:文件路径
:param final_commit_map:提交参数
:param extra:额外数据列
:return:
"""
make_dir_safe(file_path)
with open(file_path, 'w') as out:
title = '%s,%s,%s,%s,%s,%s' % (
"提交人邮箱", "提交人姓名", "总行数", "增加行数", "删除行数", extra)
out.write(title + "\n")
# print(title)
for key, value in final_commit_map.items():
var = '%s,%s,%s,%s,%s' % (
value.author_email, value.author_name, value.total, value.additions, value.deletions)
out.write(var + '\n')
# print(var)
out.close()
class CommitDetails(json.JSONEncoder):
"""
提交信息的结构体
"""
author_name = None
author_email = None
additions = 0
deletions = 0
total = 0
class ProjectInfo(json.JSONEncoder):
"""
工程信息的结构体
"""
project_id = None
project_desc = None
project_url = None
path = None
name = None
commit_map = None
if __name__ == '__main__':
gitlab4 = GitlabApiCountTrueLeTrue()
gitlab4.get_projects()