Skip to content

Commit

Permalink
Merge 'Add support for soundex() function' from flaneur
Browse files Browse the repository at this point in the history
add [soundex](https://www.sqlite.org/lang_corefunc.html#soundex) scalar
function.
it seems that sqlite did not enable `soundex()` function by default
unless build it with `SQLITE_SOUNDEX`, while the sqlite in the ci
workflow did not enable it. this pr skipped the test over `soundex()`
temporarily in the `scalar-function.test` file.

Closes #453
  • Loading branch information
penberg committed Dec 12, 2024
2 parents 91764b8 + 03288e5 commit 5796b41
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 4 deletions.
3 changes: 2 additions & 1 deletion COMPAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ This document describes the SQLite compatibility status of Limbo:
- [SQLite Compatibility](#sqlite-compatibility)
- [Limitations](#limitations)
- [SQL statements](#sql-statements)
- [SELECT Expressions](#select-expressions)
- [SQL functions](#sql-functions)
- [Scalar functions](#scalar-functions)
- [Aggregate functions](#aggregate-functions)
Expand Down Expand Up @@ -138,7 +139,7 @@ Feature support of [sqlite expr syntax](https://www.sqlite.org/lang_expr.html).
| rtrim(X) | Yes | |
| rtrim(X,Y) | Yes | |
| sign(X) | Yes | |
| soundex(X) | No | |
| soundex(X) | Yes | |
| sqlite_compileoption_get(N) | No | |
| sqlite_compileoption_used(X) | No | |
| sqlite_offset(X) | No | |
Expand Down
3 changes: 3 additions & 0 deletions core/function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ pub enum ScalarFunc {
Sign,
Substr,
Substring,
Soundex,
Date,
Time,
Typeof,
Expand Down Expand Up @@ -119,6 +120,7 @@ impl Display for ScalarFunc {
ScalarFunc::Sign => "sign".to_string(),
ScalarFunc::Substr => "substr".to_string(),
ScalarFunc::Substring => "substring".to_string(),
ScalarFunc::Soundex => "soundex".to_string(),
ScalarFunc::Date => "date".to_string(),
ScalarFunc::Time => "time".to_string(),
ScalarFunc::Typeof => "typeof".to_string(),
Expand Down Expand Up @@ -210,6 +212,7 @@ impl Func {
"hex" => Ok(Func::Scalar(ScalarFunc::Hex)),
"unhex" => Ok(Func::Scalar(ScalarFunc::Unhex)),
"zeroblob" => Ok(Func::Scalar(ScalarFunc::ZeroBlob)),
"soundex" => Ok(Func::Scalar(ScalarFunc::Soundex)),
_ => Err(()),
}
}
Expand Down
1 change: 1 addition & 0 deletions core/translate/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,7 @@ pub fn translate_expr(
| ScalarFunc::Quote
| ScalarFunc::RandomBlob
| ScalarFunc::Sign
| ScalarFunc::Soundex
| ScalarFunc::ZeroBlob => {
let args = if let Some(args) = args {
if args.len() != 1 {
Expand Down
145 changes: 142 additions & 3 deletions core/vdbe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2356,6 +2356,7 @@ impl Program {
| ScalarFunc::Quote
| ScalarFunc::RandomBlob
| ScalarFunc::Sign
| ScalarFunc::Soundex
| ScalarFunc::ZeroBlob => {
let reg_value = state.registers[*start_reg].borrow_mut();
let result = match scalar_func {
Expand All @@ -2370,6 +2371,7 @@ impl Program {
ScalarFunc::Quote => Some(exec_quote(reg_value)),
ScalarFunc::RandomBlob => Some(exec_randomblob(reg_value)),
ScalarFunc::ZeroBlob => Some(exec_zeroblob(reg_value)),
ScalarFunc::Soundex => Some(exec_soundex(reg_value)),
_ => unreachable!(),
};
state.registers[*dest] = result.unwrap_or(OwnedValue::Null);
Expand Down Expand Up @@ -2873,6 +2875,96 @@ fn exec_sign(reg: &OwnedValue) -> Option<OwnedValue> {
Some(OwnedValue::Integer(sign))
}

/// Generates the Soundex code for a given word
pub fn exec_soundex(reg: &OwnedValue) -> OwnedValue {
let s = match reg {
OwnedValue::Null => return OwnedValue::Text(Rc::new("?000".to_string())),
OwnedValue::Text(s) => {
// return ?000 if non ASCII alphabet character is found
if !s.chars().all(|c| c.is_ascii_alphabetic()) {
return OwnedValue::Text(Rc::new("?000".to_string()));
}
s.clone()
}
_ => return OwnedValue::Text(Rc::new("?000".to_string())), // For unsupported types, return NULL
};

// Remove numbers and spaces
let word: String = s
.chars()
.filter(|c| !c.is_digit(10))
.collect::<String>()
.replace(" ", "");
if word.is_empty() {
return OwnedValue::Text(Rc::new("0000".to_string()));
}

let soundex_code = |c| match c {
'b' | 'f' | 'p' | 'v' => Some('1'),
'c' | 'g' | 'j' | 'k' | 'q' | 's' | 'x' | 'z' => Some('2'),
'd' | 't' => Some('3'),
'l' => Some('4'),
'm' | 'n' => Some('5'),
'r' => Some('6'),
_ => None,
};

// Convert the word to lowercase for consistent lookups
let word = word.to_lowercase();
let first_letter = word.chars().next().unwrap();

// Remove all occurrences of 'h' and 'w' except the first letter
let code: String = word
.chars()
.skip(1)
.filter(|&ch| ch != 'h' && ch != 'w')
.fold(first_letter.to_string(), |mut acc, ch| {
acc.push(ch);
acc
});

// Replace consonants with digits based on Soundex mapping
let tmp: String = code
.chars()
.map(|ch| match soundex_code(ch) {
Some(code) => code.to_string(),
None => ch.to_string(),
})
.collect();

// Remove adjacent same digits
let tmp = tmp.chars().fold(String::new(), |mut acc, ch| {
if acc.chars().last() != Some(ch) {
acc.push(ch);
}
acc
});

// Remove all occurrences of a, e, i, o, u, y except the first letter
let mut result = tmp
.chars()
.enumerate()
.filter(|(i, ch)| *i == 0 || !matches!(ch, 'a' | 'e' | 'i' | 'o' | 'u' | 'y'))
.map(|(_, ch)| ch)
.collect::<String>();

// If the first symbol is a digit, replace it with the saved first letter
if let Some(first_digit) = result.chars().next() {
if first_digit.is_digit(10) {
result.replace_range(0..1, &first_letter.to_string());
}
}

// Append zeros if the result contains less than 4 characters
while result.len() < 4 {
result.push('0');
}

// Retain the first 4 characters and convert to uppercase
result.truncate(4);
OwnedValue::Text(Rc::new(result.to_uppercase()))
}

fn exec_abs(reg: &OwnedValue) -> Option<OwnedValue> {
match reg {
OwnedValue::Integer(x) => {
Expand Down Expand Up @@ -3462,9 +3554,9 @@ mod tests {
use super::{
exec_abs, exec_char, exec_hex, exec_if, exec_instr, exec_length, exec_like, exec_lower,
exec_ltrim, exec_max, exec_min, exec_nullif, exec_quote, exec_random, exec_randomblob,
exec_round, exec_rtrim, exec_sign, exec_substring, exec_trim, exec_typeof, exec_unhex,
exec_unicode, exec_upper, exec_zeroblob, execute_sqlite_version, get_new_rowid, AggContext,
Cursor, CursorResult, LimboError, OwnedRecord, OwnedValue, Result,
exec_round, exec_rtrim, exec_sign, exec_soundex, exec_substring, exec_trim, exec_typeof,
exec_unhex, exec_unicode, exec_upper, exec_zeroblob, execute_sqlite_version, get_new_rowid,
AggContext, Cursor, CursorResult, LimboError, OwnedRecord, OwnedValue, Result,
};
use mockall::{mock, predicate};
use rand::{rngs::mock::StepRng, thread_rng};
Expand Down Expand Up @@ -3794,6 +3886,53 @@ mod tests {
assert_eq!(exec_rtrim(&input_str, Some(pattern_str)), expected_str);
}

#[test]
fn test_soundex() {
let input_str = OwnedValue::Text(Rc::new(String::from("Pfister")));
let expected_str = OwnedValue::Text(Rc::new(String::from("P236")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("husobee")));
let expected_str = OwnedValue::Text(Rc::new(String::from("H210")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("Tymczak")));
let expected_str = OwnedValue::Text(Rc::new(String::from("T522")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("Ashcraft")));
let expected_str = OwnedValue::Text(Rc::new(String::from("A261")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("Robert")));
let expected_str = OwnedValue::Text(Rc::new(String::from("R163")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("Rupert")));
let expected_str = OwnedValue::Text(Rc::new(String::from("R163")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("Rubin")));
let expected_str = OwnedValue::Text(Rc::new(String::from("R150")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("Kant")));
let expected_str = OwnedValue::Text(Rc::new(String::from("K530")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("Knuth")));
let expected_str = OwnedValue::Text(Rc::new(String::from("K530")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("x")));
let expected_str = OwnedValue::Text(Rc::new(String::from("X000")));
assert_eq!(exec_soundex(&input_str), expected_str);

let input_str = OwnedValue::Text(Rc::new(String::from("闪电五连鞭")));
let expected_str = OwnedValue::Text(Rc::new(String::from("?000")));
assert_eq!(exec_soundex(&input_str), expected_str);
}

#[test]
fn test_upper_case() {
let input_str = OwnedValue::Text(Rc::new(String::from("Limbo")));
Expand Down
4 changes: 4 additions & 0 deletions testing/scalar-functions.test
Original file line number Diff line number Diff line change
Expand Up @@ -767,3 +767,7 @@ do_execsql_test cast-in-where {
select age from users where age = cast('45' as integer) limit 1;
} {45}

# TODO: sqlite seems not enable soundex() by default unless build it with SQLITE_SOUNDEX enabled.
# do_execsql_test soundex-text {
# select soundex('Pfister'), soundex('husobee'), soundex('Tymczak'), soundex('Ashcraft'), soundex('Robert'), soundex('Rupert'), soundex('Rubin'), soundex('Kant'), soundex('Knuth'), soundex('x'), soundex('');
# } {P236|H210|T522|A261|R163|R163|R150|K530|K530|X000|0000}

0 comments on commit 5796b41

Please sign in to comment.