From c4019669153831380c8939ba6fd9a9eaff27e6c6 Mon Sep 17 00:00:00 2001 From: tris203 Date: Sat, 1 Jun 2024 00:09:04 +0100 Subject: [PATCH] feat: ffi for char classing --- lua/precognition/ffi.lua | 29 +++++++++++++++++++++++++++++ lua/precognition/utils.lua | 18 ++++++++++-------- 2 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 lua/precognition/ffi.lua diff --git a/lua/precognition/ffi.lua b/lua/precognition/ffi.lua new file mode 100644 index 0000000..06528ef --- /dev/null +++ b/lua/precognition/ffi.lua @@ -0,0 +1,29 @@ +local M = {} + +---@return ffi.namespace* +function M.load() + if not _G.precog_C then + local ffi = require("ffi") + local ok, err = pcall( + ffi.cdef, + [[ + int utf_class(const int c); + ]] + ) + ---@diagnostic disable-next-line: need-check-nil + if not ok then + error(err) + end + _G.precog_C = ffi.C + end + return _G.precog_C +end + +return setmetatable(M, { + __index = function(_, key) + return M.load()[key] + end, + __newindex = function(_, k, v) + M.load()[k] = v + end, +}) diff --git a/lua/precognition/utils.lua b/lua/precognition/utils.lua index 8e1ad53..c01d5fc 100644 --- a/lua/precognition/utils.lua +++ b/lua/precognition/utils.lua @@ -5,6 +5,7 @@ M.char_classes = { whitespace = 0, other = 1, word = 2, + emoji = 3, } ---@param char string @@ -14,18 +15,19 @@ function M.char_class(char, big_word) assert(type(big_word) == "boolean", "big_word must be a boolean") local cc = M.char_classes local byte = string.byte(char) + if byte == nil then + return cc.other + end + if char == " " or char == "\t" or char == "\0" then + return cc.whitespace + end - if byte and byte < 0x100 then - if char == " " or char == "\t" or char == "\0" then - return cc.whitespace - end - if char == "_" or char:match("%w") then - return big_word and cc.other or cc.word - end + local c_class = require("precognition.ffi").utf_class(byte) + if big_word and c_class ~= 0 then return cc.other end - return cc.other -- scary unicode edge cases go here + return c_class end ---@param bufnr? integer