From 03288e51706652df5bf57dd018f0eefc03c54ecd Mon Sep 17 00:00:00 2001 From: Li Yazhou Date: Thu, 12 Dec 2024 21:21:36 -0600 Subject: [PATCH] add impl about scalar function soundex with test --- COMPAT.md | 3 +- core/function.rs | 3 + core/translate/expr.rs | 1 + core/vdbe/mod.rs | 145 +++++++++++++++++++++++++++++++++- testing/scalar-functions.test | 4 + 5 files changed, 152 insertions(+), 4 deletions(-) diff --git a/COMPAT.md b/COMPAT.md index 913b0366..d25e67a0 100644 --- a/COMPAT.md +++ b/COMPAT.md @@ -5,6 +5,7 @@ This document describes the SQLite compatibility status of Limbo: - [SQLite Compatibility](#sqlite-compatibility) - [Limitations](#limitations) - [SQL statements](#sql-statements) + - [SELECT Expressions](#select-expressions) - [SQL functions](#sql-functions) - [Scalar functions](#scalar-functions) - [Aggregate functions](#aggregate-functions) @@ -138,7 +139,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html). | rtrim(X) | Yes | | | rtrim(X,Y) | Yes | | | sign(X) | Yes | | -| soundex(X) | No | | +| soundex(X) | Yes | | | sqlite_compileoption_get(N) | No | | | sqlite_compileoption_used(X) | No | | | sqlite_offset(X) | No | | diff --git a/core/function.rs b/core/function.rs index 9ece1299..0659da73 100644 --- a/core/function.rs +++ b/core/function.rs @@ -76,6 +76,7 @@ pub enum ScalarFunc { Sign, Substr, Substring, + Soundex, Date, Time, Typeof, @@ -119,6 +120,7 @@ impl Display for ScalarFunc { ScalarFunc::Sign => "sign".to_string(), ScalarFunc::Substr => "substr".to_string(), ScalarFunc::Substring => "substring".to_string(), + ScalarFunc::Soundex => "soundex".to_string(), ScalarFunc::Date => "date".to_string(), ScalarFunc::Time => "time".to_string(), ScalarFunc::Typeof => "typeof".to_string(), @@ -210,6 +212,7 @@ impl Func { "hex" => Ok(Func::Scalar(ScalarFunc::Hex)), "unhex" => Ok(Func::Scalar(ScalarFunc::Unhex)), "zeroblob" => Ok(Func::Scalar(ScalarFunc::ZeroBlob)), + "soundex" => Ok(Func::Scalar(ScalarFunc::Soundex)), _ => Err(()), } } diff --git a/core/translate/expr.rs b/core/translate/expr.rs index 8a13e134..d15103a9 100644 --- a/core/translate/expr.rs +++ b/core/translate/expr.rs @@ -1169,6 +1169,7 @@ pub fn translate_expr( | ScalarFunc::Quote | ScalarFunc::RandomBlob | ScalarFunc::Sign + | ScalarFunc::Soundex | ScalarFunc::ZeroBlob => { let args = if let Some(args) = args { if args.len() != 1 { diff --git a/core/vdbe/mod.rs b/core/vdbe/mod.rs index ebd25360..b3b40379 100644 --- a/core/vdbe/mod.rs +++ b/core/vdbe/mod.rs @@ -2149,6 +2149,7 @@ impl Program { | ScalarFunc::Quote | ScalarFunc::RandomBlob | ScalarFunc::Sign + | ScalarFunc::Soundex | ScalarFunc::ZeroBlob => { let reg_value = state.registers[*start_reg].borrow_mut(); let result = match scalar_func { @@ -2163,6 +2164,7 @@ impl Program { ScalarFunc::Quote => Some(exec_quote(reg_value)), ScalarFunc::RandomBlob => Some(exec_randomblob(reg_value)), ScalarFunc::ZeroBlob => Some(exec_zeroblob(reg_value)), + ScalarFunc::Soundex => Some(exec_soundex(reg_value)), _ => unreachable!(), }; state.registers[*dest] = result.unwrap_or(OwnedValue::Null); @@ -2666,6 +2668,96 @@ fn exec_sign(reg: &OwnedValue) -> Option { Some(OwnedValue::Integer(sign)) } +/// Generates the Soundex code for a given word +pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue { + let s = match reg { + OwnedValue::Null => return OwnedValue::Text(Rc::new("?000".to_string())), + OwnedValue::Text(s) => { + // return ?000 if non ASCII alphabet character is found + if !s.chars().all(|c| c.is_ascii_alphabetic()) { + return OwnedValue::Text(Rc::new("?000".to_string())); + } + s.clone() + } + _ => return OwnedValue::Text(Rc::new("?000".to_string())), // For unsupported types, return NULL + }; + + // Remove numbers and spaces + let word: String = s + .chars() + .filter(|c| !c.is_digit(10)) + .collect::() + .replace(" ", ""); + if word.is_empty() { + return OwnedValue::Text(Rc::new("0000".to_string())); + } + + let soundex_code = |c| match c { + 'b' | 'f' | 'p' | 'v' => Some('1'), + 'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => Some('2'), + 'd' | 't' => Some('3'), + 'l' => Some('4'), + 'm' | 'n' => Some('5'), + 'r' => Some('6'), + _ => None, + }; + + // Convert the word to lowercase for consistent lookups + let word = word.to_lowercase(); + let first_letter = word.chars().next().unwrap(); + + // Remove all occurrences of 'h' and 'w' except the first letter + let code: String = word + .chars() + .skip(1) + .filter(|&ch| ch != 'h' && ch != 'w') + .fold(first_letter.to_string(), |mut acc, ch| { + acc.push(ch); + acc + }); + + // Replace consonants with digits based on Soundex mapping + let tmp: String = code + .chars() + .map(|ch| match soundex_code(ch) { + Some(code) => code.to_string(), + None => ch.to_string(), + }) + .collect(); + + // Remove adjacent same digits + let tmp = tmp.chars().fold(String::new(), |mut acc, ch| { + if acc.chars().last() != Some(ch) { + acc.push(ch); + } + acc + }); + + // Remove all occurrences of a, e, i, o, u, y except the first letter + let mut result = tmp + .chars() + .enumerate() + .filter(|(i, ch)| *i == 0 || !matches!(ch, 'a' | 'e' | 'i' | 'o' | 'u' | 'y')) + .map(|(_, ch)| ch) + .collect::(); + + // If the first symbol is a digit, replace it with the saved first letter + if let Some(first_digit) = result.chars().next() { + if first_digit.is_digit(10) { + result.replace_range(0..1, &first_letter.to_string()); + } + } + + // Append zeros if the result contains less than 4 characters + while result.len() < 4 { + result.push('0'); + } + + // Retain the first 4 characters and convert to uppercase + result.truncate(4); + OwnedValue::Text(Rc::new(result.to_uppercase())) +} + fn exec_abs(reg: &OwnedValue) -> Option { match reg { OwnedValue::Integer(x) => { @@ -3255,9 +3347,9 @@ mod tests { use super::{ exec_abs, exec_char, exec_hex, exec_if, exec_instr, exec_length, exec_like, exec_lower, exec_ltrim, exec_max, exec_min, exec_nullif, exec_quote, exec_random, exec_randomblob, - exec_round, exec_rtrim, exec_sign, exec_substring, exec_trim, exec_typeof, exec_unhex, - exec_unicode, exec_upper, exec_zeroblob, execute_sqlite_version, get_new_rowid, AggContext, - Cursor, CursorResult, LimboError, OwnedRecord, OwnedValue, Result, + exec_round, exec_rtrim, exec_sign, exec_soundex, exec_substring, exec_trim, exec_typeof, + exec_unhex, exec_unicode, exec_upper, exec_zeroblob, execute_sqlite_version, get_new_rowid, + AggContext, Cursor, CursorResult, LimboError, OwnedRecord, OwnedValue, Result, }; use mockall::{mock, predicate}; use rand::{rngs::mock::StepRng, thread_rng}; @@ -3587,6 +3679,53 @@ mod tests { assert_eq!(exec_rtrim(&input_str, Some(pattern_str)), expected_str); } + #[test] + fn test_soundex() { + let input_str = OwnedValue::Text(Rc::new(String::from("Pfister"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("P236"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("husobee"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("H210"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("Tymczak"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("T522"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("Ashcraft"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("A261"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("Robert"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("R163"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("Rupert"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("R163"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("Rubin"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("R150"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("Kant"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("K530"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("Knuth"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("K530"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("x"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("X000"))); + assert_eq!(exec_soundex(&input_str), expected_str); + + let input_str = OwnedValue::Text(Rc::new(String::from("闪电五连鞭"))); + let expected_str = OwnedValue::Text(Rc::new(String::from("?000"))); + assert_eq!(exec_soundex(&input_str), expected_str); + } + #[test] fn test_upper_case() { let input_str = OwnedValue::Text(Rc::new(String::from("Limbo"))); diff --git a/testing/scalar-functions.test b/testing/scalar-functions.test index ae96ddad..80878cba 100755 --- a/testing/scalar-functions.test +++ b/testing/scalar-functions.test @@ -767,3 +767,7 @@ do_execsql_test cast-in-where { select age from users where age = cast('45' as integer) limit 1; } {45} +# TODO: sqlite seems not enable soundex() by default unless build it with SQLITE_SOUNDEX enabled. +# do_execsql_test soundex-text { +# select soundex('Pfister'), soundex('husobee'), soundex('Tymczak'), soundex('Ashcraft'), soundex('Robert'), soundex('Rupert'), soundex('Rubin'), soundex('Kant'), soundex('Knuth'), soundex('x'), soundex(''); +# } {P236|H210|T522|A261|R163|R163|R150|K530|K530|X000|0000} \ No newline at end of file