From ae38390a19d0224d8e1d128c137b9b22319c4c52 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Mirzaei Date: Wed, 17 Jan 2024 02:44:09 +0330 Subject: [PATCH] add words-to-number module --- Cargo.toml | 14 +- Makefile | 5 +- src/lib.rs | 4 + src/words_to_number/constants.rs | 71 +++++++++ src/words_to_number/errors.rs | 9 ++ src/words_to_number/mod.rs | 250 +++++++++++++++++++++++++++++++ 6 files changed, 346 insertions(+), 7 deletions(-) create mode 100644 src/words_to_number/constants.rs create mode 100644 src/words_to_number/errors.rs create mode 100644 src/words_to_number/mod.rs diff --git a/Cargo.toml b/Cargo.toml index 5e7d5d7..98717ee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,8 +27,6 @@ chrono = { version = "0.4.31", optional = true } # Edit `Makefile` and `src/lib.src` after making changes in this section: [features] -# For now, by default we enable all features: -default = ["full"] full = [ "add-ordinal-suffix", "commas", @@ -47,8 +45,12 @@ full = [ "time-ago", "get-place-by-iran-national-id", "half-space", - "legal-id" -] + "legal-id", + "words-to-number", +] # For now, by default we enable all features: + + +default = ["full"] add-ordinal-suffix = [] commas = [] digits = [] @@ -67,8 +69,8 @@ extract-card-number = [] time-ago = ["dep:thiserror", "dep:chrono"] get-place-by-iran-national-id = ["dep:thiserror"] half-space = [] -legal-id= ["dep:thiserror"] - +legal-id = ["dep:thiserror"] +words-to-number = ["dep:thiserror", "commas", "digits", "remove-ordinal-suffix"] [package.metadata.docs.rs] all-features = true diff --git a/Makefile b/Makefile index ca65c42..cc6b3ab 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ fmt: cargo fmt -build: full default add-ordinal-suffix commas digits find-capital-by-province persian-chars national-id remove-ordinal-suffix url-fix verity-card-number time-ago phone-number bill number-to-words get-bank-name-by-card-number extract-card-number get-place-by-iran-national-id half-space legal-id +build: full default add-ordinal-suffix commas digits find-capital-by-province persian-chars national-id remove-ordinal-suffix url-fix verity-card-number time-ago phone-number bill number-to-words get-bank-name-by-card-number extract-card-number get-place-by-iran-national-id half-space legal-id words-to-number check: clippy lint @@ -126,4 +126,7 @@ half-space: legal-id: @ echo "" cargo build --no-default-features --features=legal-id +words-to-number: + @ echo "" + cargo build --no-default-features --features=words-to-number @ ls -sh target/debug/*.rlib \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 7a0fef7..4755872 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,6 +17,7 @@ feature = "get-place-by-iran-national-id", feature = "half-space", feature = "legal-id", + feature = "words-to-number", )))] compile_error!("No available Cargo feature is included"); @@ -73,3 +74,6 @@ pub mod half_space; #[cfg(feature = "legal-id")] pub mod legal_id; + +#[cfg(feature = "words-to-number")] +pub mod words_to_number; diff --git a/src/words_to_number/constants.rs b/src/words_to_number/constants.rs new file mode 100644 index 0000000..49cc5ba --- /dev/null +++ b/src/words_to_number/constants.rs @@ -0,0 +1,71 @@ +pub(super) const NEGATIVE_PREFIX: &str = "منفی"; + +pub(super) static UNITS: &[(&str, i64)] = &[ + ("صفر", 0), + ("یک", 1), + ("دو", 2), + ("سه", 3), + ("چهار", 4), + ("پنج", 5), + ("شش", 6), + ("شیش", 6), + ("هفت", 7), + ("هشت", 8), + ("نه", 9), + ("ده", 10), + ("یازده", 11), + ("دوازده", 12), + ("سیزده", 13), + ("چهارده", 14), + ("پانزده", 15), + ("شانزده", 16), + ("هفده", 17), + ("هجده", 18), + ("نوزده", 19), + ("بیست", 20), + ("سی", 30), + ("چهل", 40), + ("پنجاه", 50), + ("شصت", 60), + ("هفتاد", 70), + ("هشتاد", 80), + ("نود", 90), + ("صد", 100), + ("یکصد", 100), + ("دویست", 200), + ("سیصد", 300), + ("چهارصد", 400), + ("پانصد", 500), + ("ششصد", 600), + ("هفتصد", 700), + ("هشتصد", 800), + ("نهصد", 900), +]; + +pub(super) static MAGNITUDE: &[(&str, i64)] = &[ + ("هزار", 1000), + ("میلیون", 1000000), + ("بیلیون", 1000000000), + ("میلیارد", 1000000000), + ("تریلیون", 1000000000000), +]; + +// pub(super) static ALL_WORDS: &[(&str, i64)] = UNITS.iter().chain(MAGNITUDE.iter()); + +pub(super) fn get_unit_number(unit: &str) -> Option<&i64> { + let result = UNITS + .iter() + .find(|(key, _)| key == &unit) + .map(|(_, details)| details); + + result +} + +pub(super) fn get_magnitute_number(unit: &str) -> Option<&i64> { + let result = MAGNITUDE + .iter() + .find(|(key, _)| key == &unit) + .map(|(_, details)| details); + + result +} diff --git a/src/words_to_number/errors.rs b/src/words_to_number/errors.rs new file mode 100644 index 0000000..d23483e --- /dev/null +++ b/src/words_to_number/errors.rs @@ -0,0 +1,9 @@ +use thiserror::Error; + +#[derive(Error, Debug, PartialEq, Eq)] +pub enum WordsToNumberError { + #[error("There is a invalid unit in the input")] + InvalidUnit, + #[error("The input cannot be a empty string")] + EmptyInput, +} diff --git a/src/words_to_number/mod.rs b/src/words_to_number/mod.rs new file mode 100644 index 0000000..ac8ff5b --- /dev/null +++ b/src/words_to_number/mod.rs @@ -0,0 +1,250 @@ +mod constants; +mod errors; + +use crate::{ + commas::add_commas::add_commas_mut, + digits::{DigitsAr2En, DigitsEn2ArMut, DigitsEn2FaMut, DigitsFa2En}, + remove_ordinal_suffix::remove_ordinal_suffix, +}; + +use self::{ + constants::{get_magnitute_number, get_unit_number, NEGATIVE_PREFIX}, + errors::WordsToNumberError, +}; + +#[derive(Debug, PartialEq)] +pub enum Language { + Arabic, + Persian, + English, +} + +pub struct Options { + pub digits: Language, + pub add_commas: bool, +} + +impl Default for Options { + fn default() -> Self { + Options { + digits: Language::English, + add_commas: false, + } + } +} + +fn calculate(tokens: Vec) -> Result { + let mut sum = 0; + let mut is_negetive = false; + + for token in tokens { + if token == NEGATIVE_PREFIX { + // check negetive token + is_negetive = true; + } else if let Some(value) = get_unit_number(&token) { + // if token is a valid number + sum += value; + } else if let Some(value) = get_magnitute_number(&token) { + // if token is a magnitute valid number + if sum == 0 { + sum = *value; + } else { + sum *= value; + } + } else if let Ok(value) = token.digits_fa_to_en().digits_ar_to_en().parse::() { + sum += value; + } else { + return Err(WordsToNumberError::InvalidUnit); + } + } + + if is_negetive { + Ok(-sum) + } else { + Ok(sum) + } +} + +/// returns a i64 number if the givin input is a standard persian number text otherwise it would return a error +/// +/// ordinal suffix is supported for example "منفی سی اُم" +/// +/// if you need to change numbers format for example add commas or change numbers to arabic or persian as result you may use [words_to_number_str] +/// +/// # Examples +/// +/// ``` +/// use rust_persian_tools::words_to_number::words_to_number; +/// +/// assert_eq!(words_to_number("منفی سه هزار").unwrap(), -3000); +/// assert!(words_to_number("سلام چطوری").is_err()); +/// ``` +pub fn words_to_number(words: impl AsRef) -> Result { + let words = words.as_ref().trim(); + + if words.is_empty() { + return Err(WordsToNumberError::EmptyInput); + } + + // remove ordinal suffix from each word + let tokens = words + .replace("شیش صد", "ششصد") + .replace("شش صد", "ششصد") + .replace("هفت صد", "هفتصد") + .replace("هشت صد", "هشتصد") + .replace("نه صد", "نهصد") + .split_ascii_whitespace() + .filter(|word| *word != "و" && *word != "ام" && *word != "اُم") + .map(remove_ordinal_suffix) + .collect::>(); + + calculate(tokens) +} + +/// returns a number as [String] if the givin input is a standard persian number text otherwise it would return a error +/// +/// first you need to create a [Options] struct , you may also use ```Options::defualt()``` +/// +/// ordinal suffix is supported for example "منفی سی اُم" +/// +/// if you just need the number as i64 you may use [words_to_number] +/// +/// # Examples +/// +/// ``` +/// use rust_persian_tools::words_to_number::{Options,Language,words_to_number_str}; +/// +/// let option = Options { +/// digits: Language::Arabic, +/// add_commas: true, +/// }; +/// assert_eq!( +/// words_to_number_str("دوازده هزار بیست دو", &option).unwrap(), +/// "١٢,٠٢٢" +/// ); +/// ``` +pub fn words_to_number_str( + words: impl AsRef, + option: &Options, +) -> Result { + let number = words_to_number(words)?; + let mut result = format!("{number}"); + + if option.add_commas { + add_commas_mut(&mut result) + } + + if option.digits == Language::Arabic { + result.digits_en_to_ar_mut(); + } else if option.digits == Language::Persian { + result.digits_en_to_fa_mut(); + } + + Ok(result) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn words_to_number_test() { + assert_eq!(words_to_number("منفی سه هزار").unwrap(), -3000); + assert_eq!(words_to_number("دوازده هزار بیست دو").unwrap(), 12022); + assert_eq!(words_to_number("دوازده هزار و بیست و دو").unwrap(), 12022); + } + + #[test] + fn words_to_number_with_comma_test() { + let option = Options { + digits: Language::English, + add_commas: true, + }; + + assert_eq!( + words_to_number_str("دوازده هزار بیست دو", &option).unwrap(), + "12,022" + ); + assert_eq!( + words_to_number_str("دوازده هزار و بیست و دو", &option).unwrap(), + "12,022" + ); + } + + #[test] + fn words_to_number_arabic() { + let option = Options { + digits: Language::Arabic, + add_commas: false, + }; + + assert_eq!( + words_to_number_str("منفی سه هزار", &option).unwrap(), + "-٣٠٠٠" + ); + assert_eq!( + words_to_number_str("سه هزار دویست و دوازده", &option).unwrap(), + "٣٢١٢" + ); + assert_eq!( + words_to_number_str("دوازده هزار بیست دو", &option).unwrap(), + "١٢٠٢٢" + ); + assert_eq!( + words_to_number_str("چهارصد پنجاه هزار", &option).unwrap(), + "٤٥٠٠٠٠" + ); + } + + #[test] + fn words_to_number_with_comma_arabic() { + let option = Options { + digits: Language::Arabic, + add_commas: true, + }; + + assert_eq!( + words_to_number_str("دوازده هزار بیست دو", &option).unwrap(), + "١٢,٠٢٢" + ); + assert_eq!( + words_to_number_str("دوازده هزار و بیست و دو", &option).unwrap(), + "١٢,٠٢٢" + ); + assert_eq!( + words_to_number_str("چهارصد پنجاه هزار", &option).unwrap(), + "٤٥٠,٠٠٠" + ); + } + + #[test] + fn words_to_number_with_ordinal_words() { + let option_comma_fa = Options { + digits: Language::Persian, + add_commas: true, + }; + + assert_eq!( + words_to_number_str("منفی ۳ هزار", &option_comma_fa).unwrap(), + "-۳,۰۰۰" + ); + assert_eq!( + words_to_number_str("منفی 3 هزار و 200", &option_comma_fa).unwrap(), + "-۳,۲۰۰" + ); + assert_eq!( + words_to_number_str("منفی سه هزارمین", &option_comma_fa).unwrap(), + "-۳,۰۰۰" + ); + + assert_eq!(words_to_number("منفی سه هزارم").unwrap(), -3000); + assert_eq!(words_to_number("سی و سوم").unwrap(), 33); + assert_eq!(words_to_number("منفی سی اُم").unwrap(), -30); + } + + #[test] + fn words_to_number_fail_cases() { + assert!(words_to_number("سلام چطوری").is_err()); + assert!(words_to_number("").is_err()); + } +}