Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Levenshtein distance functions to string lib #3648

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions common/stringFunctions.lua
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,66 @@ if not string.formatSI then
return str .. siPrefix
end
end

if not string.Levenshtein then
Beherith marked this conversation as resolved.
Show resolved Hide resolved
-- reuseable tables
local Levenshtein0 = {}
local Levenshtein1 = {}
local min = math.min
local ssub = string.sub

--- Calculates edit distance of two strings, O(n*m) time, O(n) memory
---@param a string
---@param b string
---@return number

function string.Levenshtein(a,b)
Beherith marked this conversation as resolved.
Show resolved Hide resolved
local lena = string.len(a)
local lenb = string.len(b)
local ssub = string.sub
Beherith marked this conversation as resolved.
Show resolved Hide resolved

Levenshtein1[1] = 0
for c = 0, lenb do -- initialize the first row
Levenshtein0[c+1] = c
end
for r = 1, lena do
--print(table.unpack(Levenshtein0))
Beherith marked this conversation as resolved.
Show resolved Hide resolved
for c = 0, lenb do -- 16 ns/loop wtf
if c == 0 then
Levenshtein1[1] = r
else
Levenshtein1[c+1] = min(
min(Levenshtein0[c+1] + 1, Levenshtein1[c] + 1),
Levenshtein0[c] + (ssub(a,r,r) == ssub(b,c,c) and 0 or 1)
)
end
end
Levenshtein0, Levenshtein1 = Levenshtein1, Levenshtein0 -- swap rows
end
return Levenshtein1[lenb]
Beherith marked this conversation as resolved.
Show resolved Hide resolved
end

--- Finds string that is closest to a in a table
---@param a string
---@param t table, primarily values are strings, keys can be strings too
---@return string, number bestresult, bestscore
function string.FindClosest(a,t)
local lena = string.len(a)
local bestscore = lena
local bestresult = a
for k,v in pairs(t) do
local b = (type(v) == 'string' and v) or (type(k) == 'string' and k) or "" -- whichever is string, or empty
if math.abs(string.len(b) - lena) < bestscore then -- heuristics
local distance = string.Levenshtein(a, b)
Beherith marked this conversation as resolved.
Show resolved Hide resolved
if distance < bestscore then
bestscore = distance
bestresult = b
end
end
end
return bestresult, bestscore
end

-- print(string.Levenshtein(string.rep("asdfasdfasdfasdf", 1000), string.rep("asdfasdfasdfasda", 1000))) -- 5 seconds
-- print(string.FindClosest("apple", {"pear", "popple","bear","ple"}))
Beherith marked this conversation as resolved.
Show resolved Hide resolved
end