-
Notifications
You must be signed in to change notification settings - Fork 20
/
quirks.py
364 lines (304 loc) · 13.8 KB
/
quirks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
import os
import re
import random
import logging
import itertools
import ostools
from mispeller import mispeller
from parsetools import parseRegexpFunctions, lexMessage, smiledict
_datadir = ostools.getDataDir()
PchumLog = logging.getLogger("pchumLogger")
_urlre = re.compile(r"(?i)(?:^|(?<=\s))(?:(?:https?|ftp)://|magnet:)[^\s]+")
# _url2re = re.compile(r"(?i)(?<!//)\bwww\.[^\s]+?\.")
_groupre = re.compile(r"\\([0-9]+)")
_upperre = re.compile(r"upper\(([\w<>\\]+)\)")
_lowerre = re.compile(r"lower\(([\w<>\\]+)\)")
_scramblere = re.compile(r"scramble\(([\w<>\\]+)\)")
_reversere = re.compile(r"reverse\(([\w<>\\]+)\)")
_ctagre = re.compile("(</?c=?.*?>)", re.I)
_smilere = re.compile("|".join(list(smiledict.keys())))
_memore = re.compile(r"(\s|^)(#[A-Za-z0-9_]+)")
_handlere = re.compile(r"(\s|^)(@[A-Za-z0-9_]+)")
_alternian = re.compile(r"<alt>.*?</alt>")
# traditional quirks
def PesterQuirkFactory(quirk: dict):
"""Returns a valid PesterQuirk object from the given quirk dictionary"""
# This is a "factory" because a lot of old code depends on calling the old class which was all quirks rolled into 1
# Switch case is much nicer here but python 3.8 doesn't support it :"3
# TODO: change back to a switch statement when windows 7 support is dropped
if quirk["type"] == "prefix":
return PrefixPesterQuirk(quirk)
elif quirk["type"] == "suffix":
return SuffixPesterQuirk(quirk)
elif quirk["type"] == "replace":
return ReplacePesterQuirk(quirk)
elif quirk["type"] == "regexp":
return RegexpPesterQuirk(quirk)
elif quirk["type"] == "random":
return RandomPesterQuirk(quirk)
elif quirk["type"] == "spelling":
return MispellerPesterQuirk(quirk)
class PesterQuirk:
def __init__(self, quirk: dict):
self.quirk = quirk
self.type = self.quirk["type"]
self.on = self.quirk.get("on", True)
self.group = self.quirk.get("group", "Miscellaneous")
self.checkstate = self.quirk.get(
"checkstate", 0
) ## Seems to be somethign related to the QT checkbox? QtCore.QT.CheckState
def apply(self, string: str, first: bool = False, last: bool = False):
"""string: string to operate quirk on. first: is the given substring at the very start (is_first_string) of the superstring? last: is the given substring at the very last (idx == -1) of the superstring?"""
if self.on:
return self._apply(string, first, last)
else:
return string
def _apply(self, string: str, first: bool, last: bool):
# Overwrite (return string)
raise NotImplementedError()
def __str__(self):
# Overwrite (return string)
return "UNKNOWN QUIRK"
class PrefixPesterQuirk(PesterQuirk):
def __init__(self, quirk: dict):
assert quirk["type"] == "prefix"
super().__init__(quirk)
def _apply(self, string: str, first: bool, last: bool):
return self.quirk["value"] + string
def __str__(self):
return "BEGIN WITH: %s" % (self.quirk["value"])
class SuffixPesterQuirk(PesterQuirk):
def __init__(self, quirk: dict):
assert quirk["type"] == "suffix"
super().__init__(quirk)
def _apply(self, string: str, first: bool, last: bool):
return string + self.quirk["value"]
def __str__(self):
return "END WITH: %s" % (self.quirk["value"])
class ReplacePesterQuirk(PesterQuirk):
def __init__(self, quirk: dict):
assert quirk["type"] == "replace"
super().__init__(quirk)
def _apply(self, string: str, first: bool, last: bool):
return string.replace(self.quirk["from"], self.quirk["to"])
def __str__(self):
return "REPLACE {} WITH {}".format(self.quirk["from"], self.quirk["to"])
class RegexpPesterQuirk(PesterQuirk):
def __init__(self, quirk: dict):
assert quirk["type"] == "regexp"
super().__init__(quirk)
def _apply(self, string: str, first: bool, last: bool):
# regex string
from_ = self.quirk["from"]
# Exit prematurely if the regexp is only supposed to act on the first substring of the superstring and this isnt that (^ is start of string)
if not first and len(from_) > 0 and from_[0] == "^":
return string
# Exit prematurely if the regexp is only supposed to act on the last substring of the superstring and this isnt that ($ is end of string)
if not last and len(from_) > 0 and from_[-1] == "$":
return string
# the replace string
to = self.quirk["to"]
# I think this handles the regex functions like rainbow()
parse_tree = parseRegexpFunctions(to)
return re.sub(from_, parse_tree.expand, string)
def __str__(self):
return "REGEXP: {} REPLACED WITH {}".format(
self.quirk["from"],
self.quirk["to"],
)
class RandomPesterQuirk(PesterQuirk):
def __init__(self, quirk: dict):
assert quirk["type"] == "random"
super().__init__(quirk)
def _apply(self, string: str, first: bool, last: bool):
# Fallback if the quirk is not set up right (no random strings to replace with)
if len(self.quirk.get("randomlist", [])) == 0:
return string
# regex string
from_ = self.quirk["from"]
# See regexPesterQuirk
if not first and len(from_) > 0 and from_[0] == "^":
return string
if not last and len(from_) > 0 and from_[-1] == "$":
return string
# Pick random item
# I believe this gets called for each match in the re.sub
def randomrep(mo):
choice = random.choice(self.quirk["randomlist"])
parse_tree = parseRegexpFunctions(choice)
return parse_tree.expand(mo)
return re.sub(from_, randomrep, string)
def __str__(self):
return "REGEXP: {} RANDOMLY REPLACED WITH {}".format(
self.quirk["from"],
self.quirk["randomlist"],
)
class MispellerPesterQuirk(PesterQuirk):
def __init__(self, quirk: dict):
assert quirk["type"] == "spelling"
super().__init__(quirk)
def _apply(self, string: str, first: bool, last: bool):
percentage = self.quirk["percentage"] / 100.0
out = []
# regex to avoid color tags
ctag = re.compile("(</?c=?.*?>)", re.I)
# Split by space to get all words in given string
for word in string.split(" "):
# get random 0.0 - 1.0 number
dice = random.random()
if not ctag.search(word) and dice < percentage:
# word is not wrapped in color tags :)
out.append(mispeller(word))
elif dice < percentage:
# word IS wrapped in color tags!!
tmp = []
split = ctag.split(word)
# Only garble substrings if they are not a <c> tag
for sequence in split:
if sequence and not ctag.search(sequence):
tmp.append(mispeller(sequence))
else:
tmp.append(sequence)
out.append("".join(tmp))
else:
out.append(word)
# Turn back into normal sentence
return " ".join(out)
def __str__(self):
return "MISPELLER: %d%%" % (self.quirk["percentage"])
# TODO: clean this up. its huge and really hard to read
class PesterQuirkCollection:
def __init__(self, quirklist):
self.quirklist = []
for quirk in quirklist:
self.addQuirk(quirk)
def plainList(self):
# Returns a list of all the quirk dictionaries
return [quirk.quirk for quirk in self.quirklist]
def addQuirk(self, quirk):
"""quirk: dict or a PesterQuirk"""
if isinstance(quirk, dict):
self.quirklist.append(PesterQuirkFactory(quirk))
elif isinstance(quirk, PesterQuirk):
self.quirklist.append(quirk)
def apply(self, lexed, first=False, last=False):
prefixes = [
quirk for quirk in self.quirklist if isinstance(quirk, PrefixPesterQuirk)
]
# suffix = [q for q in self.quirklist if q.type == "suffix"]
newlist = []
for idx, original in enumerate(lexed):
is_first_string = idx == 0
if not isinstance(original, str):
if is_first_string:
string = " "
for prefix_quirk in prefixes:
string += prefix_quirk.apply(string)
newlist.append(string)
newlist.append(original)
continue
is_last_string = idx == len(lexed) - 1
string = original
for quirk in self.quirklist:
try:
checkstate = int(quirk.checkstate)
except Exception:
checkstate = 0
# Exclude option is checked
if checkstate == 2:
# Check for substring that should be excluded.
excludes = []
# Return matches for links, smilies, handles, memos.
# Chain the iterators and add to excludes list.
matches = itertools.chain(
re.finditer(_urlre, string),
re.finditer(_smilere, string),
re.finditer(_handlere, string),
re.finditer(_memore, string),
re.finditer(_alternian, string),
)
excludes.extend(matches)
if excludes:
# SORT !!!
excludes.sort(key=lambda exclude: exclude.start())
# Recursion check.
# Strings like http://:3: require this.
for n in range(0, len(excludes) - 1):
if excludes[n].end() > excludes[n + 1].start():
excludes.pop(n)
# Seperate parts to be quirked.
sendparts = []
# Add string until start of exclude at index 0.
until = excludes[0].start()
sendparts.append(string[:until])
# Add strings between excludes.
for part in range(1, len(excludes)):
after = excludes[part - 1].end()
until = excludes[part].start()
sendparts.append(string[after:until])
# Add string after exclude at last index.
after = excludes[-1].end()
sendparts.append(string[after:])
# Quirk to-be-quirked parts.
recvparts = []
for part in sendparts:
# No split, apply like normal.
if quirk.type in ("regexp", "random"):
recvparts.append(
quirk.apply(
part,
first=(is_first_string),
last=is_last_string,
)
)
elif quirk.type == "prefix" and is_first_string:
recvparts.append(quirk.apply(part))
elif quirk.type == "suffix" and is_last_string:
recvparts.append(quirk.apply(part))
else:
recvparts.append(quirk.apply(part))
# Reconstruct and update string.
string = ""
# print("excludes: " + str(excludes))
# print("sendparts: " + str(sendparts))
# print("recvparts: " + str(recvparts))
for part, exclude in enumerate(excludes):
string += recvparts[part]
string += exclude.group()
string += recvparts[-1]
else:
# No split, apply like normal.
if quirk.type not in ("prefix", "suffix"):
if quirk.type in ("regexp", "random"):
string = quirk.apply(
string, first=(is_first_string), last=is_last_string
)
else:
string = quirk.apply(string)
elif quirk.type == "prefix" and is_first_string:
string = quirk.apply(string)
elif quirk.type == "suffix" and is_last_string:
string = quirk.apply(string)
else:
# No split, apply like normal.
if quirk.type not in ("prefix", "suffix"):
if quirk.type in ("regexp", "random"):
string = quirk.apply(
string, first=(is_first_string), last=is_last_string
)
else:
string = quirk.apply(string)
elif quirk.type == "prefix" and is_first_string:
string = quirk.apply(string)
elif quirk.type == "suffix" and is_last_string:
string = quirk.apply(string)
newlist.append(string)
final = []
for item in newlist:
if isinstance(item, str):
final.extend(lexMessage(item))
else:
final.append(item)
return final
def __iter__(self):
yield from self.quirklist