From 5be8370453c0dfe2054e632afbfb4badd1719e74 Mon Sep 17 00:00:00 2001 From: Ali Ghahremani Date: Tue, 12 Mar 2024 21:07:45 +0330 Subject: [PATCH 1/4] ref: remove_ordinal_suffix feat: add _mut feat: traits --- src/remove_ordinal_suffix/mod.rs | 73 ++++++++++++++++++++++++++++---- 1 file changed, 65 insertions(+), 8 deletions(-) diff --git a/src/remove_ordinal_suffix/mod.rs b/src/remove_ordinal_suffix/mod.rs index 11cf106..75ffabd 100644 --- a/src/remove_ordinal_suffix/mod.rs +++ b/src/remove_ordinal_suffix/mod.rs @@ -1,28 +1,78 @@ /// Remove Ordinal suffix to numbers
/// # Example: /// --- input: چهل و سوم +/// --- output: چهل و سه
/// ``` /// use rust_persian_tools::remove_ordinal_suffix::remove_ordinal_suffix; /// assert_eq!(remove_ordinal_suffix("چهل و سوم"), "چهل و سه"); /// ``` -/// --- output: چهل و سه
pub fn remove_ordinal_suffix(word: impl AsRef) -> String { let mut word: String = word.as_ref().to_string(); //allocate + remove_ordinal_suffix_mut(&mut word); + word +} +pub trait RemoveOrdinalSuffix { + fn remove_ordinal_suffix(&self) -> String; +} + +impl RemoveOrdinalSuffix for String { + fn remove_ordinal_suffix(&self) -> String { + remove_ordinal_suffix(self) + } +} +impl RemoveOrdinalSuffix for str { + fn remove_ordinal_suffix(&self) -> String { + remove_ordinal_suffix(self) + } +} +use std::borrow::Cow; +impl RemoveOrdinalSuffix for Cow<'_, str> { + fn remove_ordinal_suffix(&self) -> String { + remove_ordinal_suffix(self) + } +} + +/// Remove Ordinal Suffix of a number in-place without any allocation +/// Remove Ordinal suffix to numbers
+/// # Example: +/// --- input: چهل و سوم +/// --- edited input: چهل و سه
+/// ``` +/// use rust_persian_tools::remove_ordinal_suffix::remove_ordinal_suffix_mut; +/// let mut word = String::from("چهل و سوم"); +/// remove_ordinal_suffix_mut(&mut word); +/// assert_eq!(word, "چهل و سه"); +/// ``` +pub fn remove_ordinal_suffix_mut(word: &mut String) { if word.ends_with("مین") { - word = word[0..word.len() - ("مین".len())].to_string() + *word = word[0..word.len() - ("مین".len())].to_string() } else if word.ends_with("اُم") { - word = word[0..word.len() - ("اُم".len())].trim().to_string() + *word = word[0..word.len() - ("اُم".len())].trim().to_string() } else if word.ends_with("ام") { - word = word[0..word.len() - ("ام".len())].trim().to_string() + *word = word[0..word.len() - ("ام".len())].trim().to_string() } else if word.ends_with("سوم") { - word = word[0..word.len() - ("سوم".len())].to_string(); - word += "سه"; + *word = word[0..word.len() - ("سوم".len())].to_string(); + *word += "سه"; } else if word.ends_with('م') { - word = word[0..word.len() - ("م".len())].to_string() + *word = word[0..word.len() - ("م".len())].to_string() } +} - word +pub trait RemoveOrdinalSuffixMut { + fn remove_ordinal_suffix_mut(&mut self); +} + +impl RemoveOrdinalSuffixMut for String { + fn remove_ordinal_suffix_mut(&mut self) { + remove_ordinal_suffix_mut(self) + } +} + +impl RemoveOrdinalSuffixMut for Cow<'_, str> { + fn remove_ordinal_suffix_mut(&mut self) { + remove_ordinal_suffix_mut(self.to_mut()) + } } #[cfg(test)] @@ -35,4 +85,11 @@ mod remove_ordinal_suffix_tests { assert_eq!(remove_ordinal_suffix("چهل و پنجم"), "چهل و پنج"); assert_eq!(remove_ordinal_suffix("سی اُم"), "سی"); } + + #[test] + fn remove_mut_test() { + let mut word = String::from("چهل و سوم"); + remove_ordinal_suffix_mut(&mut word); + assert_eq!(word, "چهل و سه"); + } } From e91f1529676d2ba45fda8655c0b4c0ec723db393 Mon Sep 17 00:00:00 2001 From: Ali Ghahremani Date: Tue, 12 Mar 2024 22:56:31 +0330 Subject: [PATCH 2/4] ref: performance improvment on words_to_number by using hashmap instead of array linear search --- src/words_to_number/constants.rs | 116 ++++++++++++++----------------- src/words_to_number/mod.rs | 2 +- 2 files changed, 53 insertions(+), 65 deletions(-) diff --git a/src/words_to_number/constants.rs b/src/words_to_number/constants.rs index 8360968..d89c81f 100644 --- a/src/words_to_number/constants.rs +++ b/src/words_to_number/constants.rs @@ -1,69 +1,57 @@ pub(super) const NEGATIVE_PREFIX: &str = "منفی"; -pub(super) static UNITS: &[(&str, i64)] = &[ - ("صفر", 0), - ("یک", 1), - ("دو", 2), - ("سه", 3), - ("چهار", 4), - ("پنج", 5), - ("شش", 6), - ("شیش", 6), - ("هفت", 7), - ("هشت", 8), - ("نه", 9), - ("ده", 10), - ("یازده", 11), - ("دوازده", 12), - ("سیزده", 13), - ("چهارده", 14), - ("پانزده", 15), - ("شانزده", 16), - ("هفده", 17), - ("هجده", 18), - ("نوزده", 19), - ("بیست", 20), - ("سی", 30), - ("چهل", 40), - ("پنجاه", 50), - ("شصت", 60), - ("هفتاد", 70), - ("هشتاد", 80), - ("نود", 90), - ("صد", 100), - ("یکصد", 100), - ("دویست", 200), - ("سیصد", 300), - ("چهارصد", 400), - ("پانصد", 500), - ("ششصد", 600), - ("هفتصد", 700), - ("هشتصد", 800), - ("نهصد", 900), -]; - -pub(super) static MAGNITUDE: &[(&str, i64)] = &[ - ("هزار", 1000), - ("میلیون", 1000000), - ("بیلیون", 1000000000), - ("میلیارد", 1000000000), - ("تریلیون", 1000000000000), -]; - -pub(super) fn get_unit_number(unit: &str) -> Option<&i64> { - let result = UNITS - .iter() - .find(|(key, _)| key == &unit) - .map(|(_, details)| details); - - result +pub(super) fn get_unit_number(unit: &str) -> Option { + Some(match unit { + "صفر" => 0, + "یک" => 1, + "دو" => 2, + "سه" => 3, + "چهار" => 4, + "پنج" => 5, + "شش" => 6, + "شیش" => 6, + "هفت" => 7, + "هشت" => 8, + "نه" => 9, + "ده" => 10, + "یازده" => 11, + "دوازده" => 12, + "سیزده" => 13, + "چهارده" => 14, + "پانزده" => 15, + "شانزده" => 16, + "هفده" => 17, + "هجده" => 18, + "نوزده" => 19, + "بیست" => 20, + "سی" => 30, + "چهل" => 40, + "پنجاه" => 50, + "شصت" => 60, + "هفتاد" => 70, + "هشتاد" => 80, + "نود" => 90, + "صد" => 100, + "یکصد" => 100, + "دویست" => 200, + "سیصد" => 300, + "چهارصد" => 400, + "پانصد" => 500, + "ششصد" => 600, + "هفتصد" => 700, + "هشتصد" => 800, + "نهصد" => 900, + _ => return None, + }) } -pub(super) fn get_magnitute_number(unit: &str) -> Option<&i64> { - let result = MAGNITUDE - .iter() - .find(|(key, _)| key == &unit) - .map(|(_, details)| details); - - result +pub(super) fn get_magnitute_number(unit: &str) -> Option { + Some(match unit { + "هزار" => 1000, + "میلیون" => 1000000, + "بیلیون" => 1000000000, + "میلیارد" => 1000000000, + "تریلیون" => 1000000000000, + _ => return None, + }) } diff --git a/src/words_to_number/mod.rs b/src/words_to_number/mod.rs index 20967e8..419e3cc 100644 --- a/src/words_to_number/mod.rs +++ b/src/words_to_number/mod.rs @@ -48,7 +48,7 @@ fn calculate(tokens: Vec) -> Result { } else if let Some(value) = get_magnitute_number(&token) { // if token is a magnitute valid number if sum == 0 { - sum = *value; + sum = value; } else { sum *= value; } From b92d833eabdd08de56968790380d395b02c45217 Mon Sep 17 00:00:00 2001 From: Ali Ghahremani Date: Tue, 12 Mar 2024 23:12:18 +0330 Subject: [PATCH 3/4] ref: on commas add _mut traits and support for Cow --- src/commas/add_commas.rs | 22 ++++++++++++++++++++++ src/commas/remove_commas.rs | 22 ++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/commas/add_commas.rs b/src/commas/add_commas.rs index 8000e5c..8b076d1 100644 --- a/src/commas/add_commas.rs +++ b/src/commas/add_commas.rs @@ -85,6 +85,13 @@ impl AddCommas for str { add_commas(self) } } +use std::borrow::Cow; + +impl AddCommas for Cow<'_, str> { + fn add_commas(&self) -> String { + add_commas(self) + } +} impl AddCommas for String { fn add_commas(&self) -> String { @@ -92,6 +99,21 @@ impl AddCommas for String { } } +pub trait AddCommasMut { + fn add_commas_mut(&mut self); +} + +impl AddCommasMut for String { + fn add_commas_mut(&mut self) { + add_commas_mut(self) + } +} +impl AddCommasMut for Cow<'_, String> { + fn add_commas_mut(&mut self) { + add_commas_mut(self.to_mut()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/commas/remove_commas.rs b/src/commas/remove_commas.rs index 7edfa84..c0336e5 100644 --- a/src/commas/remove_commas.rs +++ b/src/commas/remove_commas.rs @@ -29,6 +29,7 @@ pub fn remove_commas_mut(str: &mut String) { str.retain(|c| c != ',') } +use std::borrow::Cow; pub trait RemoveCommas { fn remove_commas(&self) -> String; } @@ -45,6 +46,27 @@ impl RemoveCommas for str { } } +impl RemoveCommas for Cow<'_, str> { + fn remove_commas(&self) -> String { + remove_commas(self) + } +} + +pub trait RemoveCommasMut { + fn remove_commas_mut(&mut self); +} + +impl RemoveCommasMut for String { + fn remove_commas_mut(&mut self) { + remove_commas_mut(self) + } +} +impl RemoveCommasMut for Cow<'_, str> { + fn remove_commas_mut(&mut self) { + remove_commas_mut(self.to_mut()) + } +} + #[cfg(test)] mod tests { use super::*; From e3cc99869fe392f6abb25a86c776695481436644 Mon Sep 17 00:00:00 2001 From: Ali ghahremani Date: Wed, 13 Mar 2024 12:34:40 +0330 Subject: [PATCH 4/4] ref: half_space add traits add documantation --- README.md | 2 +- src/half_space/mod.rs | 60 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 29e1b75..b1c4438 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ Note: These tools are totally offline (no api calls) | half_space | [link](https://docs.rs/rust-persian-tools/1.0.0/rust_persian_tools/half_space/index.html) | نیم فاصله هارو اوکی میکنه | | legal_id | [link](https://docs.rs/rust-persian-tools/1.0.0/rust_persian_tools/legal_id/index.html) | شناسه حقوقی رو اعتبار سنجی میکنه | | national_id | [link](https://docs.rs/rust-persian-tools/1.0.0/rust_persian_tools/national_id/index.html) | کد ملی رو اعتبار سنجی میکنه | -| number_plate | [link](https://docs.rs/rust-persian-tools/1.0.0/rust_persian_tools/number_plate/index.html) | پلاک ماشین | +| number_plate | [link](https://docs.rs/rust-persian-tools/1.0.0/rust_persian_tools/number_plate/index.html) | پلاک ماشین و موتور | | number_to_words | [link](https://docs.rs/rust-persian-tools/1.0.0/rust_persian_tools/number_to_words/index.html) | عدد رو به حروف تبدیل میکنه | | persian_chars | [link](https://docs.rs/rust-persian-tools/1.0.0/rust_persian_tools/persian_chars/index.html) | فارسی بودن یک متن رو چک میکنه و میتونه بعضی حروف عربی رو به فارسی تبدیل کنه | | phone_number | [link](https://docs.rs/rust-persian-tools/1.0.0/rust_persian_tools/phone_number/index.html) | شماره تلفن رو اعتبار سنجی میکنه و اپراتور رو شناسایی میکنه | diff --git a/src/half_space/mod.rs b/src/half_space/mod.rs index 5bbb91d..8d4d761 100644 --- a/src/half_space/mod.rs +++ b/src/half_space/mod.rs @@ -1,4 +1,12 @@ /// removes half space & soft hyphon from text +/// Example: +/// ``` +/// use rust_persian_tools::half_space::remove_half_space; +/// assert_eq!( +/// remove_half_space("نمی‌خواهی درخت‌ها را ببینیم؟"), +/// "نمی خواهی درخت ها را ببینیم؟".to_string() +/// ); +/// ``` pub fn remove_half_space(input: impl AsRef) -> String { let input = input.as_ref(); @@ -8,9 +16,38 @@ pub fn remove_half_space(input: impl AsRef) -> String { .map(|ch| if ch == '\u{200C}' { ' ' } else { ch }) .collect() } +pub trait RemoveHalfSpace { + fn remove_half_space(&self) -> String; +} -// add half space to input based on most useful -pub fn add_half_space(input: &str) -> String { +impl RemoveHalfSpace for String { + fn remove_half_space(&self) -> String { + remove_half_space(self) + } +} +impl RemoveHalfSpace for str { + fn remove_half_space(&self) -> String { + remove_half_space(self) + } +} +use std::borrow::Cow; +impl RemoveHalfSpace for Cow<'_, str> { + fn remove_half_space(&self) -> String { + remove_half_space(self) + } +} + +/// add half space to input based on most useful +/// Example: +/// ``` +/// use rust_persian_tools::half_space::add_half_space; +/// assert_eq!( +/// add_half_space("نمی خواهی درخت ها را ببینیم؟"), +/// "نمی‌خواهی درخت‌ها را ببینیم؟".to_string() +/// ); +/// ``` +pub fn add_half_space(input: impl AsRef) -> String { + let input = input.as_ref(); let result = remove_half_space(input.trim()) .replace("\u{0020}می\u{0020}", "\u{0020}می\u{200c}") .replace("\u{0020}نمی\u{0020}", "\u{0020}نمی\u{200c}") @@ -52,6 +89,25 @@ pub fn add_half_space(input: &str) -> String { result } +pub trait AddHalfSpace { + fn add_half_space(&self) -> String; +} +impl AddHalfSpace for String { + fn add_half_space(&self) -> String { + add_half_space(self) + } +} +impl AddHalfSpace for str { + fn add_half_space(&self) -> String { + add_half_space(self) + } +} +impl AddHalfSpace for Cow<'_, str> { + fn add_half_space(&self) -> String { + add_half_space(self) + } +} + #[cfg(test)] mod tests { use super::*;