-
Notifications
You must be signed in to change notification settings - Fork 0
/
spellchecker.lua
397 lines (313 loc) · 11.9 KB
/
spellchecker.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
--[[
Spell checker for MUSHclient, written by Nick Gammon.
Written: 9th October 2006
Updated: 11th October 2006
Updated: 6th March 2007 to make progress bar optional
Updated: 13th April 2007 to added IGNORE_MIXED_CASE, IGNORE_IMBEDDED_NUMBERS
Updated: 15th February 2009 to convert to using SQLite database instead of Lua table
Updated: 21st February 2009 to fix problem where words with 2 metaphones were only stored once.
--]]
local SHOW_PROGRESS_BAR = true -- show progress bar? true or false
local METAPHONE_LENGTH = 4 -- how many characters of metaphone to get back
local EDIT_DISTANCE = 4 -- how close a word must be to appear in the list of suggestions
local CASE_SENSITIVE = false -- compare case? true or false
local IGNORE_CAPITALIZED = false -- ignore words starting with a capital? true or false
local IGNORE_MIXED_CASE = false -- ignore words in MixedCase (like that one)? true or false
local IGNORE_IMBEDDED_NUMBERS = false -- ignore words with numbers in them? true or false
-- this is the pattern we use to find "words" in the text to be spell-checked
local pattern = "%a+'?[%a%d]+" -- regexp to give us a word with a possible single imbedded quote
-- path to the spell check dictionaries
local directory = utils.info ().app_directory .. "spell\\"
-- file name of the user dictionary, in the above path
local userdict = "userdict.txt"
-- stuff below used internally
local make_upper -- this becomes the upper-case conversion function, see below
local db -- SQLite3 dictionary database
local cancelmessage = "spell check cancelled"
local previousword --> not used right now
local change, ignore -- tables of change-all, ignore-all words
-- dictionaries - add new entries along similar lines to add more dictionary files
local files = {
-- lower-case words
"english-words.10",
"english-words.20",
"english-words.35",
"english-words.40",
-- upper case words
"english-upper.10",
"english-upper.35",
"english-upper.40",
-- American words
"american-words.10",
"american-words.20",
-- contractions (eg. aren't, doesn't)
"english-contractions.10",
"english-contractions.35",
-- user dictionary
userdict,
}
-- trim leading and trailing spaces from a string
local function trim (s)
return (string.gsub (s, "^%s*(.-)%s*$", "%1"))
end -- trim
-- insert a word into our metaphone table - called by reading dictionaries
-- and also by adding a word during the spellcheck
local function insert_word (word, user)
if word == "" then
return
end -- empty word
-- get both metaphones
local m1, m2 = utils.metaphone (word, METAPHONE_LENGTH)
local fixed_word = string.gsub (word, "'", "''") -- convert ' to ''
assert (db:execute (string.format ("INSERT INTO words VALUES (NULL, '%s', '%s', %i)",
fixed_word, m1, user)));
-- do 2nd metaphone, if any
if m2 then
assert (db:execute (string.format ("INSERT INTO words VALUES (NULL, '%s', '%s', %i)",
fixed_word, m2, user)));
end -- having alternative
end -- insert_word
-- sort function for sorting the suggestions into edit-distance order
local function suggestions_compare (word)
return function (a, b)
local diff = utils.edit_distance (make_upper (a), word) -
utils.edit_distance (make_upper (b), word)
if diff == 0 then
return make_upper (a) < make_upper (b)
else
return diff < 0
end -- differences the same?
end -- compareit
end -- function suggestions_compare
-- check for one word, called by spellcheck (invokes suggestion dialog)
local function checkword_and_suggest (word)
if IGNORE_CAPITALIZED then
-- ignore words starting in caps
if string.find (word, "^[A-Z]") then
return word, "ignore"
end -- this round, ignore this word
end -- if IGNORE_CAPITALIZED
if IGNORE_MIXED_CASE then
-- ignore words in mixed case
if string.find (word, "[A-Z]") and
string.find (word, "[a-z]") then
return word, "ignore"
end -- this round, ignore this word
end -- if IGNORE_MIXED_CASE
if IGNORE_IMBEDDED_NUMBERS then
-- ignore words with numbers in them
if string.find (word, "%d") then
return word, "ignore"
end -- this round, ignore this word
end -- if IGNORE_IMBEDDED_NUMBERS
uc_word = make_upper (word) -- convert to upper-case if wanted
-- if we already did "ignore all" on this particular word, ignore it again
if ignore [word] then
return word, "ignore"
end -- this round, ignore this word
-- if we said change A to B, change it again
if change [word] then
return change [word], "change"
end -- change to this word
-- table of suggestions, based on the metaphone
local keyed_suggestions = {}
-- get both metaphones
local m1, m2 = utils.metaphone (word, METAPHONE_LENGTH)
local function lookup_metaphone (m)
local found = false
for row in db:rows(string.format ("SELECT name FROM words WHERE metaphone = '%s'", m)) do
local word = row [1]
if make_upper (word) == uc_word then
found = true -- found exact match
break
end -- found
if utils.edit_distance (make_upper (word), uc_word) < EDIT_DISTANCE then
keyed_suggestions [word] = true
end -- close enough
end
return found
end -- lookup_metaphone
-- look up first metaphone
if lookup_metaphone (m1) then
return word, "ok"
end -- word found
-- try 2nd metaphone
if m2 then
if lookup_metaphone (m2) then
return word, "ok"
end -- word found
end -- have alternate metaphone
-- pull into indexed table
local suggestions = {}
for k in pairs (keyed_suggestions) do
table.insert (suggestions, k)
end -- for
table.sort (suggestions, suggestions_compare (uc_word))
-- not found? do spell check dialog
local action, replacement = utils.spellcheckdialog (word, suggestions)
-- they cancelled?
if not action then
error (cancelmessage) --> forces us out of gsub loop
end -- cancelled
-- ignore this only - just return
if action == "ignore" then
return word, "ignore" -- use current word
end -- ignore word
-- ignore all of this word? add to list
if action == "ignoreall" then
ignore [word] = true
return word, "ignore" -- use current word
end -- ignore word
-- add to user dictionary?
-- add to metaphone table, and rewrite dictionary
if action == "add" then
insert_word (word, 1)
return word, "ok"
end -- adding
-- change word once? return replacement
if action == "change" then
return checkword_and_suggest (replacement) -- however, re-check it
end -- changing
-- change all occurrences? add to table, return replacement
if action == "changeall" then
local newword, newaction = checkword_and_suggest (replacement) -- re-check it
if newaction == "ok" then
change [word] = newword
end -- if approved
return newword -- return the new word
end -- changing
error "unexpected result from dialog"
end -- checkword_and_suggest
-- exported function to do the spellcheck
function spellcheck (line)
change = {} -- words to change
ignore = {} -- words to ignore
-- we raise an error if they cancel the spell check dialog
ok, result = xpcall (function ()
return string.gsub (line, pattern, checkword_and_suggest)
end, debug.traceback)
if ok then
return result
end -- not cancelled spell check
-- whoops! syntax error?
if not string.find (result, cancelmessage, 1, true) then
error (result)
end -- some syntax error
return nil --> shows they cancelled
end -- spellchecker
local notfound -- table of not-found words, for spellcheck_string
-- check for one word, called by spellcheck_string
local function checkword (word)
uc_word = make_upper (word) -- convert to upper-case if wanted
-- get first metaphone
local m = utils.metaphone (word, METAPHONE_LENGTH)
local found = false
for row in db:rows(string.format ("SELECT name FROM words WHERE metaphone = '%s'", m)) do
local word = row [1]
if make_upper (word) == uc_word then
found = true -- found exact match
break
end -- found
end
if found then return end -- do nothing if word found
-- otherwise insert our word
table.insert (notfound, word)
end -- function checkword
-- exported function to spellcheck a string
function spellcheck_string (text)
notfound = {}
string.gsub (text, pattern, checkword)
return notfound
end -- spellcheck_string
-- exported function to add a word to the user dictionary
function spellcheck_add_word (word, action, replacement)
assert (action == "i", "Can only use action 'i' in user dictionary") -- only "i" supported right now
insert_word (word, 1)
end -- spellcheck_string
-- read one of the dictionaries
local function read_dict (dlg, name)
if SHOW_PROGRESS_BAR then
dlg:step ()
dlg:status (directory .. name)
if dlg:checkcancel () then
error "Dictionary loading cancelled"
end -- if cancelled
end -- if SHOW_PROGRESS_BAR
for line in io.lines (directory .. name) do
insert_word (line, 0)
end
end -- read_dict
local function init ()
-- make a suitable function depending on whether they want case-sensitive or not
if CASE_SENSITIVE then
make_upper = function (s) return s end -- return original
else
make_upper = function (s) return s:upper () end -- make upper case
end -- case-sensitivity test
-- if no user dictionary, create it
local f = io.open (directory .. userdict, "r")
if not f then
f = io.output (directory .. userdict)
f:close ()
else
f:close ()
end -- checking for user dictionary
-- open database on disk
db = assert (sqlite3.open( directory .. "spell.sqlite"))
local words_table = false
local count = 0
-- if database just created, there won't be a words table
for row in db:nrows("SELECT * FROM sqlite_master WHERE type = 'table' AND name = 'words'") do
if string.match (row.sql, "word_id") then -- better be newer version
words_table = true
end -- if
end
-- enable WAL (Write-Ahead Logging)
assert (db:execute "PRAGMA journal_mode=WAL;")
-- if no words table, make one
if not words_table then
-- create a table to hold the words
assert (db:execute[[
DROP TABLE IF EXISTS words;
CREATE TABLE words(
word_id INTEGER NOT NULL PRIMARY KEY autoincrement,
name VARCHAR(10) NOT NULL,
metaphone VARCHAR(10) NOT NULL,
user INT(1)
);
CREATE INDEX metaphone_index ON words (metaphone);
CREATE INDEX name_index ON words (name);
]])
end -- if
-- check if table empty
for row in db:rows('SELECT COUNT(*) FROM words') do
count = row [1]
end
-- if empty, populate it
if count == 0 then
local dlg
if SHOW_PROGRESS_BAR then
dlg = progress.new ("Loading dictionaries into SQLite database ...")
dlg:range (0, #files)
dlg:setstep (1)
end -- if SHOW_PROGRESS_BAR
assert (db:execute "BEGIN TRANSACTION");
for k, v in ipairs (files) do
ok, result = pcall (function ()
read_dict (dlg, v)
end)
if not ok then
if SHOW_PROGRESS_BAR then
dlg:close ()
end -- if SHOW_PROGRESS_BAR
error (result)
end -- not ok
end -- reading each file
if SHOW_PROGRESS_BAR then
dlg:close ()
end -- if SHOW_PROGRESS_BAR
assert (db:execute "COMMIT");
end -- if nothing in database
end -- init
-- when script is loaded, do initialization stuff
init ()