forked from thisismypassport/shrinko8
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pico_minify.py
407 lines (334 loc) · 17.1 KB
/
pico_minify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
from utils import *
from pico_defs import fixnum_is_negative
from pico_tokenize import TokenType
from pico_tokenize import StopTraverse, k_skip_children
from pico_parse import Node, NodeType, VarKind
from pico_parse import k_unary_ops_prec, get_precedence, is_right_assoc, can_replace_with_unary
from pico_parse import is_vararg_expr, is_short_block_stmt, is_global_or_builtin_local
from pico_output import format_fixnum, format_string_literal
from pico_output import output_min_wspace, output_original_wspace
class Focus(Bitmask):
chars = compressed = tokens = ...
none = 0
def minify_string_literal(ctxt, token, focus, value=None):
if value is None:
value = token.string_value
if focus.chars:
return format_string_literal(value, use_complex_long=ctxt.version >= 40)
else:
# haven't found a good balanced heuristic for 'long' yet
return format_string_literal(value, long=token.value.startswith('['))
def minify_needs_comments(minify):
# returns whether minify_code makes use of the tokens' comments
return not minify.get("wspace", True)
def get_node_bodies(node):
if node.type in (NodeType.if_, NodeType.elseif):
yield node.then
if node.else_:
yield from get_node_bodies(node.else_)
else:
yield node.body
def analyze_code_for_minify(root, focus):
shorts = CounterDictionary()
longs = CounterDictionary()
shortenables = set()
def analyze_node_post(node):
if node.type in (NodeType.if_, NodeType.while_):
is_short = node.short
weight = 1
if node.type == NodeType.if_:
else_ = node.else_
while else_ and else_.type == NodeType.elseif:
weight += 1
else_ = else_.else_
has_elseif = weight > 1
# can the node be converted to shorthand?
if not is_short and not has_elseif:
has_shorthand, has_empties, starts_with_do = False, False, False
def check_shorthand(node):
nonlocal has_shorthand
# ideally, could allow last node in an 'if' to be a print...
if node.short or node in shortenables:
has_shorthand = True
# first check the parents
node.traverse_parents(check_shorthand)
# now check the children
for i, body in enumerate(get_node_bodies(node)):
body.traverse_nodes(post=check_shorthand)
if not body.children:
has_empties = True
if i == 0:
# beware of do block ambiguity
starts_with_do = body.first_token().value == "do"
# empty bodies require extra ';'s to shorten, which worsens compression
is_short = not has_shorthand and not (has_empties and not focus.chars) and not starts_with_do
if is_short:
shortenables.add(node)
if is_short:
shorts[node.type] += weight
else:
longs[node.type] += weight
root.traverse_nodes(post=analyze_node_post)
new_shorts = {}
for type in (NodeType.if_, NodeType.while_):
# if everything can be made short, that's always best.
# else, consistency is better for compression while more shorts are better for chars
if focus.chars or not longs[type] or (not focus.compressed and longs[type] * 1.5 <= shorts[type]):
new_shorts[type] = True
elif focus.compressed:
new_shorts[type] = False
else:
new_shorts[type] = None # leave alone
return Dynamic(new_shorts=new_shorts, shortenables=shortenables)
def minify_change_shorthand(node, new_short):
if new_short:
node.short = True
node.remove_token(2, ("then", "do"))
if node.type == NodeType.if_ and node.else_:
node.else_.short = True
node.else_.remove_token(-1, "end")
else:
node.remove_token(-1, "end")
# we can assume node.cond is not wrapped in parens, since we're in a post-visit
# wrap it in parens ourselves (TODO: eww...)
node.cond.replace_with(Node(NodeType.group, [], child=node.cond.move()))
node.cond.children.append(node.cond.child)
node.cond.insert_token(0, TokenType.punct, "(", near_next=True)
node.cond.append_token(TokenType.punct, ")")
# fixup empty bodies
for body in get_node_bodies(node):
if not body.children:
body.append_token(TokenType.punct, ";")
# remove line breaks originally in the source
vline = node.first_token().vline
def fix_vlines(token):
token.vline = vline
node.traverse_tokens(fix_vlines)
else:
node.short = False
node.insert_token(2, TokenType.keyword, "then" if node.type == NodeType.if_ else "do")
if node.type == NodeType.if_ and node.else_:
node.else_.short = False
node.else_.append_token(TokenType.keyword, "end", near_next=True)
else:
node.append_token(TokenType.keyword, "end", near_next=True)
def node_contains_vars(root, vars):
def visitor(node):
if node.type == NodeType.var and node.var in vars:
raise StopTraverse()
try:
root.traverse_nodes(visitor, extra=True)
return False
except StopTraverse:
return True
def expr_is_trivial(root, ctxt, safe_only, allow_member=True, allow_index=True, allow_call=True):
def visitor(expr):
# nodes that cannot call user-defined code in any case
if expr.type in (NodeType.const, NodeType.varargs, NodeType.group,
NodeType.table, NodeType.table_member, NodeType.table_index): # (since new tables have no metatable)
pass
elif expr.type == NodeType.var and expr.kind != VarKind.global_:
pass
elif expr.type == NodeType.unary_op and expr.op == "not":
pass
elif expr.type == NodeType.binary_op and expr.op in ("and", "or"):
pass
elif expr.type == NodeType.function:
assert not expr.target # we only traverse expressions!
return k_skip_children
# nodes that may call user-defined code
elif expr.type == NodeType.call:
func = expr.func
if safe_only or not allow_call or \
not (func.type == NodeType.var and is_global_or_builtin_local(func) and not func.var.reassigned and func.name not in ctxt.callback_builtins):
raise StopTraverse()
elif expr.type == NodeType.member and not allow_member:
raise StopTraverse()
elif expr.type == NodeType.index and not allow_index:
raise StopTraverse()
# nodes that may call user-defined code via metatables (E.g. member access, operators)
elif safe_only:
raise StopTraverse()
try:
root.traverse_nodes(visitor)
return True
except StopTraverse:
return False
def minify_merge_assignments(prev, next, ctxt, safe_only):
if len(prev.targets) < len(prev.sources):
return
if len(prev.targets) > len(prev.sources) and \
((prev.sources and is_vararg_expr(prev.sources[-1])) or (next.sources and is_vararg_expr(next.sources[-1])) or len(next.targets) < len(next.sources)):
return
merge_prev = getattr(next.first_token(), "merge_prev", None)
if merge_prev is False:
return
# check if prev's targets are used in next's sources or targets
require_trivial = False # True when prev.targets may be accessed indirectly from functions that may be called by next.soources
allow_index = allow_member = True
target_vars = []
for target in prev.targets:
if target.type == NodeType.var:
target_vars.append(target.var)
# is it possible for 'next' to access 'target' without refering to it directly? (via function call)
if target.kind == VarKind.global_ or (prev.type == NodeType.assign and target.var.captured):
require_trivial = True
elif target.type == NodeType.member:
target_vars.append(target.key.var)
require_trivial = True
allow_index = False # TODO: could rely on rename's preserve logic
elif target.type == NodeType.index:
require_trivial = True
allow_member = False # TODO: could rely on rename's preserve logic
allow_index = False
else: # just in case...
return
for node in next.sources:
if target_vars and node_contains_vars(node, target_vars):
return
if require_trivial and not expr_is_trivial(node, ctxt, safe_only, allow_member, allow_index):
return
for node in next.targets:
if target_vars and node_contains_vars(node, target_vars):
return
if require_trivial and not expr_is_trivial(node, ctxt, safe_only, allow_member, allow_index, allow_call=False):
return
# when reordering local declarations, ensure we don't change which local wins out among identically-named locals
# (this relies on rename being done already!)
if len(prev.targets) > len(prev.sources) and prev.type == NodeType.local:
for target in prev.targets[len(prev.sources):]:
for next_target in next.targets:
if target.name == next_target.name:
return
# do the merge: (TODO: eww...)
def insert_array_items(dst_node, dst_arr, dst_arr_i, src_arr, src_arr_i, count):
count = default(count, len(src_arr) - src_arr_i)
if not count:
return
need_end_comma = False
if dst_arr_i < len(dst_arr):
dst_i = dst_node.children.index(dst_arr[dst_arr_i])
need_end_comma = True
elif len(dst_arr):
dst_i = dst_node.children.index(dst_arr[dst_arr_i - 1]) + 1
dst_node.insert_token(dst_i, TokenType.punct, ",")
dst_i += 1
else:
assert dst_arr is dst_node.sources
dst_node.append_token(TokenType.punct, "=")
dst_i = len(dst_node.children)
for i in range(count):
src_elem = src_arr[src_arr_i + i]
dst_arr.insert(dst_arr_i + i, src_elem)
dst_node.insert_existing(dst_i, src_elem)
dst_i += 1
if i < count - 1 or need_end_comma:
dst_node.insert_token(dst_i, TokenType.punct, ",")
dst_i += 1
insert_array_items(prev, prev.targets, len(prev.sources), next.targets, 0, None)
insert_array_items(prev, prev.sources, len(prev.sources), next.sources, 0, None)
next.erase()
def minify_code(ctxt, root, minify_opts):
safe_reorder = minify_opts.get("safe-reorder", False)
minify_lines = minify_opts.get("lines", True)
minify_wspace = minify_opts.get("wspace", True)
minify_tokens = minify_opts.get("tokens", True)
minify_comments = minify_opts.get("comments", True)
minify_reorder = minify_opts.get("reorder", True)
focus = Focus(minify_opts.get("focus"))
if not focus.tokens:
safe_reorder = True # nothing gained with False here, so set it to True just in case.
analysis = analyze_code_for_minify(root, focus)
def fixup_nodes_pre(node):
if minify_tokens:
# remove shorthands
if node.type in (NodeType.if_, NodeType.while_) and node.short and (analysis.new_shorts[node.type] == False):
minify_change_shorthand(node, False)
# remove unneeded groups
while node.type == NodeType.group:
inner, outer = node.child, node.parent
inner_prec, outer_prec = get_precedence(inner), get_precedence(outer)
needed = True
if e(inner_prec) and e(outer_prec) and (inner_prec > outer_prec or (inner_prec == outer_prec and
(outer_prec == k_unary_ops_prec or is_right_assoc(outer) == (outer.right == node)))):
needed = False
elif e(outer_prec) and inner.type in (NodeType.var, NodeType.index, NodeType.member, NodeType.call, NodeType.varargs):
needed = False
elif e(outer_prec) and inner.type == NodeType.const and (focus.tokens or can_replace_with_unary(node) or
not (inner.token.type == TokenType.number and fixnum_is_negative(inner.token.fixnum_value))):
needed = False
elif outer.type in (NodeType.group, NodeType.table_member, NodeType.table_index, NodeType.op_assign):
needed = False
elif outer.type == NodeType.call and (node in outer.args[:-1] or
(outer.args and node == outer.args[-1] and not is_vararg_expr(inner))):
needed = False
elif outer.type in (NodeType.assign, NodeType.local) and (node in outer.sources[:-1] or
(outer.sources and node == outer.sources[-1] and (not is_vararg_expr(inner) or len(outer.targets) <= len(outer.sources)))):
needed = False
elif outer.type in (NodeType.return_, NodeType.table) and (node in outer.items[:-1] or
(outer.items and node == outer.items[-1] and not is_vararg_expr(inner))):
needed = False
elif outer.type in (NodeType.if_, NodeType.elseif, NodeType.while_, NodeType.until) and not outer.short:
needed = False
if needed:
break
else:
node.replace_with(node.child.move())
# node may now be another group, so loop
def fixup_nodes_post(node):
if minify_tokens:
# create shorthands
if node.type in (NodeType.if_, NodeType.while_) and not node.short and \
(analysis.new_shorts[node.type] == True) and node in analysis.shortenables:
minify_change_shorthand(node, True)
if minify_reorder:
# merge assignments
if node.type == NodeType.local or (focus.tokens and node.type == NodeType.assign):
prev = node.prev_sibling()
while prev and prev.type == None: # skip erased
prev = prev.prev_sibling()
if prev and prev.type == node.type:
minify_merge_assignments(prev, node, ctxt, safe_reorder)
def fixup_tokens(token):
# minify sublangs
sublang = getattr(token, "sublang", None)
if sublang and sublang.minify:
token.modify(minify_string_literal(ctxt, token, focus, value=sublang.minify()))
if minify_tokens:
# remove unneeded tokens
if token.value == ";" and token.parent.type == NodeType.block and token.next_token().value != "(":
gparent = token.parent.parent
if not (gparent and is_short_block_stmt(gparent) and not token.parent.stmts):
token.erase()
return
if token.value in (",", ";") and token.parent.type == NodeType.table and token.next_sibling().value == "}":
token.erase()
return
if token.value == "(" and token.parent.type == NodeType.call and len(token.parent.args) == 1:
arg = token.parent.args[0]
if arg.type == NodeType.table or (arg.type == NodeType.const and arg.token.type == TokenType.string):
token.erase("(")
token.parent.erase_token(-1, ")")
return
# replace tokens for higher consistency
if token.value == ";" and token.parent.type == NodeType.table:
token.modify(",")
if token.value == "!=":
token.modify("~=")
if token.value == "^^" and ctxt.version >= 37:
token.modify("~")
if token.type == TokenType.string:
token.modify(minify_string_literal(ctxt, token, focus))
if token.type == TokenType.number:
allow_unary = can_replace_with_unary(token.parent)
token.modify(format_fixnum(token.fixnum_value, sign=None if allow_unary else ''))
if token.type == TokenType.number:
if token.value.startswith("-") or token.value.startswith("~"): # either due to format_fixnum above, or due to ConstToken.value
# insert synthetic unary token, so that output_tokens's tokenize and root.get_tokens() won't get confused
token.parent.insert_token(0, TokenType.punct, token.value[0], near_next=True)
token.modify(token.value[1:])
root.traverse_nodes(fixup_nodes_pre, fixup_nodes_post, tokens=fixup_tokens)
if minify_wspace:
return output_min_wspace(root, minify_lines)
else:
return output_original_wspace(root, minify_comments)