From 8f8134dbf1397fa9d48495d3faef63f1d4e86a03 Mon Sep 17 00:00:00 2001 From: Anatolii Smolianinov Date: Tue, 10 Dec 2024 13:45:41 +0100 Subject: [PATCH] use phf and lazy_static for lang map --- rust/Cargo.toml | 3 +- rust/src/tantivy_util/stemmer.rs | 53 ++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 23e2502..9db1562 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -21,7 +21,8 @@ unicode-segmentation = "1.11.0" logcall = "0.1" lazy_static = "1.5.0" tantivy-jieba = { version = "0.11.0", optional = true } +phf = { version = "0.11.2", features = ["macros"] } [features] default = ["jieba"] -jieba = ["dep:tantivy-jieba"] \ No newline at end of file +jieba = ["dep:tantivy-jieba"] diff --git a/rust/src/tantivy_util/stemmer.rs b/rust/src/tantivy_util/stemmer.rs index cea0f0d..0ef6841 100644 --- a/rust/src/tantivy_util/stemmer.rs +++ b/rust/src/tantivy_util/stemmer.rs @@ -1,28 +1,35 @@ -use tantivy::tokenizer::{Language, Stemmer}; use crate::tantivy_util::TantivyGoError; +use lazy_static::lazy_static; +use phf::phf_map; +use tantivy::tokenizer::{Language, Stemmer}; + +lazy_static! { + pub static ref LANGUAGES: phf::Map<&'static str, Language> = phf_map! { + "ar" => Language::Arabic, + "da" => Language::Danish, + "nl" => Language::Dutch, + "en" => Language::English, + "fi" => Language::Finnish, + "fr" => Language::French, + "de" => Language::German, + "el" => Language::Greek, + "hu" => Language::Hungarian, + "it" => Language::Italian, + "no" => Language::Norwegian, + "pt" => Language::Portuguese, + "ro" => Language::Romanian, + "ru" => Language::Russian, + "es" => Language::Spanish, + "sv" => Language::Swedish, + "ta" => Language::Tamil, + "tr" => Language::Turkish, + }; +} pub fn create_stemmer(lang: &str) -> Result { - let stemmer_language = match lang { - "ar" => Ok(Language::Arabic), - "da" => Ok(Language::Danish), - "nl" => Ok(Language::Dutch), - "en" => Ok(Language::English), - "fi" => Ok(Language::Finnish), - "fr" => Ok(Language::French), - "de" => Ok(Language::German), - "el" => Ok(Language::Greek), - "hu" => Ok(Language::Hungarian), - "it" => Ok(Language::Italian), - "no" => Ok(Language::Norwegian), - "pt" => Ok(Language::Portuguese), - "ro" => Ok(Language::Romanian), - "ru" => Ok(Language::Russian), - "es" => Ok(Language::Spanish), - "sv" => Ok(Language::Swedish), - "ta" => Ok(Language::Tamil), - "tr" => Ok(Language::Turkish), - _ => Err(TantivyGoError(format!("{} is an unsupported language", lang))), - }?; + let stemmer_language = LANGUAGES + .get(lang) + .ok_or_else(|| TantivyGoError(format!("{lang} is an unsupported language")))?; - Ok(Stemmer::new(stemmer_language)) + Ok(Stemmer::new(stemmer_language.to_owned())) }