-
Notifications
You must be signed in to change notification settings - Fork 2
/
qualified_fenced_code.py
320 lines (249 loc) · 11.2 KB
/
qualified_fenced_code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# -*- coding: utf-8 -*-
"""
Fenced Code Extension の改造版
=========================================
github でのコードブロック記法が使える。
>>> text = '''
... `````
... # コードをここに書く
... x = 10
... `````'''
>>> print markdown.markdown(text, extensions=['qualified_fenced_code'])
<pre><code># コードをここに書く
x = 10
</code></pre>
かつ、これらのコードに修飾ができる。
>>> text = '''
... ```
... x = [3, 2, 1]
... y = sorted(x)
... x.sort()
... ```
... sorted[color ff0000]
... sort[link http://example.com/]
... '''
>>> print markdown.markdown(text, extensions=['qualified_fenced_code'])
"""
import hashlib
import regex as re
from markdown.extensions.codehilite import CodeHilite
from markdown.extensions.codehilite import CodeHiliteExtension
from markdown.extensions import Extension
from markdown.preprocessors import Preprocessor
CODE_WRAP = '<pre><code%s>%s</code></pre>'
LANG_TAG = ' class="%s"'
QUALIFIED_FENCED_BLOCK_RE = re.compile(r'(?P<fence>`{3,})[ ]*(?P<lang>[a-zA-Z0-9_+-]*)(?P<lang_meta>.*?)\n(?P<code>.*?)(?<=\n)(?P<indent>[ \t]*)(?P=fence)[ ]*\n(?:(?=\n)|(?P<qualifies>.*?\n(?=\s*\n)))', re.MULTILINE | re.DOTALL)
QUALIFY_COMMAND_RE = re.compile(r'\[(.*?)\]')
INDENT_RE = re.compile(r'^[ \t]+', re.MULTILINE)
class QualifiedFencedCodeExtension(Extension):
def __init__(self, global_qualify_list):
self.global_qualify_list = global_qualify_list
def extendMarkdown(self, md, md_globals):
fenced_block = QualifiedFencedBlockPreprocessor(md, self.global_qualify_list)
md.registerExtension(self)
md.preprocessors.add('qualified_fenced_code', fenced_block, ">normalize_whitespace")
def _make_random_string():
"""アルファベットから成るランダムな文字列を作る"""
from random import randrange
import string
alphabets = string.ascii_letters
return ''.join(alphabets[randrange(len(alphabets))] for i in range(32))
def _escape(txt):
"""basic html escaping"""
txt = txt.replace('&', '&')
txt = txt.replace('<', '<')
txt = txt.replace('>', '>')
txt = txt.replace('"', '"')
return txt
class QualifyDictionary(object):
def __init__(self):
# 各コマンドに対する実際の処理
def _qualify_italic(*xs):
return '<i>{0}</i>'.format(*xs)
def _qualify_color(*xs):
return '<span style="color:#{1}">{0}</span>'.format(*xs)
def _qualify_link(*xs):
return '<a href="{1}">{0}</a>'.format(*xs)
self.qualify_dic = {
'italic': _qualify_italic,
'color': _qualify_color,
'link': _qualify_link,
}
class Qualifier(object):
"""修飾1個分のデータを保持するクラス"""
def __init__(self, line, qdic):
command_res = [r'(\[{cmd}(\]|.*?\]))'.format(cmd=cmd) for cmd in qdic.qualify_dic]
qualify_re_str = r'^[ \t]*\*[ \t]+(?P<target>.*?)(?P<commands>({commands})+)$'.format(
commands='|'.join(command_res))
qualify_re = re.compile(qualify_re_str)
# parsing
m = qualify_re.search(line)
if not m:
raise ValueError('Failed parse')
self.target = m.group('target')
self.commands = []
def f(match):
self.commands.append(match.group(1))
try:
QUALIFY_COMMAND_RE.sub(f, m.group('commands'))
except TypeError:
# workaround for regex library
# TypeError: expected string instance, NoneType found
pass
self._target_re = None
self._target_re_text = None
# 置換対象になる単語を正規表現で表す
def get_target_re_text(self):
if self._target_re_text is None:
target_re_text = '((?<=[^a-zA-Z_])|(?:^)){target}((?=[^a-zA-Z_])|(?:$))'.format(target=re.escape(self.target))
self._target_re_text = '(?:{})'.format(target_re_text)
return self._target_re_text
def _get_target_re(self):
if self._target_re is None:
target_re = re.compile(self.get_target_re_text())
self._target_re = target_re
return self._target_re
def find_match(self, code):
return self._get_target_re().search(code) is not None
class QualifierList(object):
def __init__(self, lines):
self._qdic = QualifyDictionary()
# Qualifier を作るが、エラーになったデータは取り除く
def unique(xs):
seen = set()
results = []
for x in xs:
if x not in seen:
seen.add(x)
try:
results.append(Qualifier(x, self._qdic))
except Exception:
pass
return results
self._qs = unique(lines)
def mark(self, code):
"""置換対象になる単語にマーキングを施す
対象文字列が 'sort' だとすれば、文字列中にある全ての 'sort' を
'{ランダムな文字列}'
という文字列に置換する。
"""
if len(self._qs) == 0:
self._code_re = re.compile("")
return code
pre_target_re_text_list = [q.get_target_re_text() for q in self._qs if q.find_match(code)]
if len(pre_target_re_text_list) == 0:
self._code_re = re.compile("")
return code
target_re_text = '|'.join(pre_target_re_text_list)
# 対象となる単語を置換し、その置換された文字列を後で辿るための正規表現(text_re_list)と、
# 置換された文字列に対してどのような修飾を行えばいいかという辞書(match_qualifier)を作る。
text_re_list = []
match_qualifier = {}
def mark_command(match):
# 各置換毎に一意な文字列を用意する
match_name = _make_random_string()
# 対象となる単語がどの修飾のデータなのかを調べる
text = match.group(0)
q = next(q for q in self._qs if q.target == text)
match_qualifier[match_name] = q
# text をこの文字列に置換する
text = '{match_name}'.format(
match_name=match_name,
)
# 置換された text だけを確実に検索するための正規表現
text_re = '(?P<{match_name}>{match_name})'.format(
match_name=match_name
)
text_re_list.append(text_re)
return text
# 対象になる単語を一括置換
code = re.sub(target_re_text, mark_command, code)
# マークされた文字列を見つけるための正規表現を作る
self._code_re = re.compile('|'.join(r for r in text_re_list))
self._match_qualifier = match_qualifier
return code
def qualify(self, html):
# 修飾の指定がなかった
if len(self._qs) == 0:
return html
# 修飾の指定はあったが、検索してみると修飾する文字列が見つからなかった
if len(self._code_re.pattern) == 0:
return html
# マークされた文字列を探しだして、そのマークに対応した修飾を行う
def convert(match):
q = next(q for m, q in self._match_qualifier.items() if match.group(m))
text = _escape(q.target)
for command in q.commands:
xs = command.split(' ')
c = xs[0]
remain = xs[1:]
# 修飾
text = self._qdic.qualify_dic[c](text, *remain)
return text
return self._code_re.sub(convert, html)
def _removeIndent(code, indent):
if len(indent) == 0:
return code
n = len(indent.expandtabs(4))
return INDENT_RE.sub(lambda m: m.group().expandtabs(4)[n:], code)
class QualifiedFencedBlockPreprocessor(Preprocessor):
def __init__(self, md, global_qualify_list):
Preprocessor.__init__(self, md)
md._example_codes = []
self.checked_for_codehilite = False
self.codehilite_conf = {}
self.global_qualify_list = global_qualify_list
def run(self, lines):
# Check for code hilite extension
if not self.checked_for_codehilite:
for ext in self.markdown.registeredExtensions:
if isinstance(ext, CodeHiliteExtension):
self.codehilite_conf = ext.config
break
self.checked_for_codehilite = True
text = "\n".join(lines)
example_counter = 0
while 1:
m = QUALIFIED_FENCED_BLOCK_RE.search(text)
if m:
# ```cpp example みたいに書かれていたらサンプルコードとして扱う
is_example = m.group('lang_meta') and ('example' in m.group('lang_meta').strip().split())
qualifies = m.group('qualifies') or ''
qualifies = qualifies + self.global_qualify_list
qualifies = [f for f in qualifies.split('\n') if f]
code = _removeIndent(*m.group('code', 'indent'))
# サンプルコードだったら、self.markdown の中にコードの情報と ID を入れておく
if is_example:
example_id = hashlib.sha1((str(example_counter) + code).encode('utf-8')).hexdigest()
self.markdown._example_codes.append({"id": example_id, "code": code})
example_counter += 1
qualifier_list = QualifierList(qualifies)
code = qualifier_list.mark(code)
# If config is not empty, then the codehighlite extension
# is enabled, so we call it to highlite the code
if self.codehilite_conf and m.group('lang'):
highliter = CodeHilite(
code,
linenums=self.codehilite_conf['linenums'][0],
guess_lang=self.codehilite_conf['guess_lang'][0],
css_class=self.codehilite_conf['css_class'][0],
style=self.codehilite_conf['pygments_style'][0],
lang=(m.group('lang') or None),
noclasses=self.codehilite_conf['noclasses'][0])
code = highliter.hilite()
# サンプルコードだったら <div id="..." class="yata"> で囲む
if is_example:
code = '<div id="%s" class="yata">%s</div>' % (example_id, code)
else:
lang = ''
if m.group('lang'):
lang = LANG_TAG % m.group('lang')
code = CODE_WRAP % (lang, _escape(code))
code = qualifier_list.qualify(code)
placeholder = self.markdown.htmlStash.store(code)
text = '%s\n%s\n%s' % (text[:m.start()], placeholder, text[m.end():])
else:
break
return text.split("\n")
def makeExtension(**kwargs):
return QualifiedFencedCodeExtension(**kwargs)