forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
translate_annotations.py
executable file
·181 lines (150 loc) · 5.37 KB
/
translate_annotations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/env python3
import argparse
import json
import re
import subprocess
from bisect import bisect_right
from collections import defaultdict
from typing import (Callable, DefaultDict, Generic, List, Optional, Pattern,
Sequence, TypeVar, cast)
from typing_extensions import TypedDict
class Hunk(TypedDict):
old_start: int
old_count: int
new_start: int
new_count: int
class Diff(TypedDict):
old_filename: Optional[str]
hunks: List[Hunk]
# adapted from the similar regex in tools/clang_tidy.py
# @@ -start,count +start,count @@
hunk_pattern = r'^@@\s+-(\d+)(?:,(\d+))?\s+\+(\d+)(?:,(\d+))?\s+@@'
def parse_diff(diff: str) -> Diff:
name = None
name_found = False
hunks: List[Hunk] = []
for line in diff.splitlines():
hunk_match = re.match(hunk_pattern, line)
if name_found:
if hunk_match:
old_start, old_count, new_start, new_count = hunk_match.groups()
hunks.append({
'old_start': int(old_start),
'old_count': int(old_count or '1'),
'new_start': int(new_start),
'new_count': int(new_count or '1'),
})
else:
assert not hunk_match
name_match = re.match(r'^--- (?:(?:/dev/null)|(?:a/(.*)))$', line)
if name_match:
name_found = True
name, = name_match.groups()
return {
'old_filename': name,
'hunks': hunks,
}
T = TypeVar('T')
U = TypeVar('U')
# we want to use bisect.bisect_right to find the closest hunk to a given
# line number, but the bisect module won't have a key function until
# Python 3.10 https://github.com/python/cpython/pull/20556 so we make an
# O(1) wrapper around the list of hunks that makes it pretend to just be
# a list of line numbers
# https://gist.github.com/ericremoreynolds/2d80300dabc70eebc790
class KeyifyList(Generic[T, U]):
def __init__(self, inner: List[T], key: Callable[[T], U]) -> None:
self.inner = inner
self.key = key
def __len__(self) -> int:
return len(self.inner)
def __getitem__(self, k: int) -> U:
return self.key(self.inner[k])
def translate(diff: Diff, line_number: int) -> Optional[int]:
if line_number < 1:
return None
hunks = diff['hunks']
if not hunks:
return line_number
keyified = KeyifyList(
hunks,
lambda hunk: hunk['new_start'] + (0 if hunk['new_count'] > 0 else 1)
)
i = bisect_right(cast(Sequence[int], keyified), line_number)
if i < 1:
return line_number
hunk = hunks[i - 1]
d = line_number - (hunk['new_start'] + (hunk['new_count'] or 1))
return None if d < 0 else hunk['old_start'] + (hunk['old_count'] or 1) + d
# we use camelCase here because this will be output as JSON and so the
# field names need to match the group names from here:
# https://github.com/pytorch/add-annotations-github-action/blob/3ab7d7345209f5299d53303f7aaca7d3bc09e250/action.yml#L23
class Annotation(TypedDict):
filename: str
lineNumber: int
columnNumber: int
errorCode: str
errorDesc: str
def parse_annotation(regex: Pattern[str], line: str) -> Optional[Annotation]:
m = re.match(regex, line)
if m:
try:
line_number = int(m.group('lineNumber'))
column_number = int(m.group('columnNumber'))
except ValueError:
return None
return {
'filename': m.group('filename'),
'lineNumber': line_number,
'columnNumber': column_number,
'errorCode': m.group('errorCode'),
'errorDesc': m.group('errorDesc'),
}
else:
return None
def translate_all(
*,
lines: List[str],
regex: Pattern[str],
commit: str
) -> List[Annotation]:
ann_dict: DefaultDict[str, List[Annotation]] = defaultdict(list)
for line in lines:
annotation = parse_annotation(regex, line)
if annotation is not None:
ann_dict[annotation['filename']].append(annotation)
ann_list = []
for filename, annotations in ann_dict.items():
raw_diff = subprocess.check_output(
['git', 'diff-index', '--unified=0', commit, filename],
encoding='utf-8',
)
diff = parse_diff(raw_diff) if raw_diff.strip() else None
# if there is a diff but it doesn't list an old filename, that
# means the file is absent in the commit we're targeting, so we
# skip it
if not (diff and not diff['old_filename']):
for annotation in annotations:
line_number: Optional[int] = annotation['lineNumber']
if diff:
annotation['filename'] = cast(str, diff['old_filename'])
line_number = translate(diff, cast(int, line_number))
if line_number:
annotation['lineNumber'] = line_number
ann_list.append(annotation)
return ann_list
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument('--file')
parser.add_argument('--regex')
parser.add_argument('--commit')
args = parser.parse_args()
with open(args.file, 'r') as f:
lines = f.readlines()
print(json.dumps(translate_all(
lines=lines,
regex=args.regex,
commit=args.commit
)))
if __name__ == '__main__':
main()