diff --git a/Cargo.toml b/Cargo.toml index 465945a..66240b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ full = [ "extract-card-number", "time-ago", "get-place-by-iran-national-id", + "half-space", ] add-ordinal-suffix = [] commas = [] @@ -64,6 +65,7 @@ get-bank-name-by-card-number = ["dep:thiserror"] extract-card-number = [] time-ago = ["dep:thiserror", "dep:chrono"] get-place-by-iran-national-id = ["dep:thiserror"] +half-space = [] [package.metadata.docs.rs] all-features = true diff --git a/Makefile b/Makefile index 639f954..dee6f1d 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ fmt: cargo fmt -build: full default add-ordinal-suffix commas digits find-capital-by-province persian-chars national-id remove-ordinal-suffix url-fix verity-card-number time-ago phone-number bill number-to-words get-bank-name-by-card-number extract-card-number get-place-by-iran-national-id +build: full default add-ordinal-suffix commas digits find-capital-by-province persian-chars national-id remove-ordinal-suffix url-fix verity-card-number time-ago phone-number bill number-to-words get-bank-name-by-card-number extract-card-number get-place-by-iran-national-id half-space check: clippy lint @@ -116,4 +116,9 @@ extract-card-number: get-place-by-iran-national-id: @ echo "" cargo build --no-default-features --features=get-place-by-iran-national-id + @ ls -sh target/debug/*.rlib + +half-space: + @ echo "" + cargo build --no-default-features --features=half-space @ ls -sh target/debug/*.rlib \ No newline at end of file diff --git a/src/half_space/mod.rs b/src/half_space/mod.rs new file mode 100644 index 0000000..0e80814 --- /dev/null +++ b/src/half_space/mod.rs @@ -0,0 +1,76 @@ +/// removes half space & soft hyphon from text +pub fn remove_half_space(input: impl AsRef) -> String { + let input = input.as_ref(); + + input + .replace('\u{00AD}', "") + .chars() + .map(|ch| if ch == '\u{200C}' { ' ' } else { ch }) + .collect() +} + +// add half space to input based on most useful +pub fn add_half_space(input: &str) -> String { + let result = remove_half_space(input.trim()) + .replace("\u{0020}می\u{0020}", "\u{0020}می\u{200c}") + .replace("\u{0020}نمی\u{0020}", "\u{0020}نمی\u{200c}") + .replace("‌\u{0020}بی\u{0020}", "\u{0020}‌بی‌\u{200c}") + .replace("\u{0020}ام\u{0020}", "‌ام‌\u{200c}") + .replace("\u{0020}ات\u{0020}", "‌ات‌\u{200c}") + .replace("\u{0020}اش\u{0020}", "‌اش‌\u{200c}") + .replace("\u{0020}ای\u{0020}", "‌ای‌\u{200c}") + .replace("\u{0020}اید\u{0020}", "‌اید‌\u{200c}") + .replace("\u{0020}ایم\u{0020}", "‌ایم‌\u{200c}") + .replace("\u{0020}اند\u{0020}", "‌اند‌\u{200c}") + .replace("\u{0020}های\u{0020}", "‌های\u{0020}") + .replace("\u{0020}ها\u{0020}", "‌ها\u{0020}") + .replace("\u{0020}تر\u{0020}", "‌تر\u{0020}") + .replace("\u{0020}تری\u{0020}", "‌تری\u{0020}") + .replace("\u{0020}هایی\u{0020}", "‌هایی‌\u{200c}") + .replace("\u{0020}هایم\u{0020}", "‌هایم‌\u{200c}") + .replace("\u{0020}هایت\u{0020}", "‌هایت‌\u{200c}") + .replace("\u{0020}هایش\u{0020}", "‌هایش‌\u{200c}") + .replace("\u{0020}هایمان\u{0020}", "‌هایمان‌\u{200c}") + .replace("\u{0020}هایتان\u{0020}", "‌هایتان‌\u{200c}") + .replace("\u{0020}هایشان\u{0020}", "‌هایشان‌\u{200c}"); + + // these section fixes the words that are started with می | نمی |‌بی + if result.starts_with("می") { + let (_, temp) = result.split_once(' ').unwrap(); + return format!("{}{}", "می\u{200c}", temp); + } else if result.starts_with("نمی") { + let (_, temp) = result.split_once(' ').unwrap(); + return format!("{}{}", "نمی\u{200c}", temp); + } else if result.starts_with("‌بی‌") { + let (_, temp) = result.split_once(' ').unwrap(); + return format!("{}{}", "‌بی‌\u{200c}", temp); + } + + result +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_adding_half_space() { + assert_eq!( + add_half_space("نمی خواهی درخت ها را ببینیم؟"), + "نمی‌خواهی درخت‌ها را ببینیم؟".to_string() + ); + + assert_eq!( + add_half_space("ای دوست سلام من به تو. نمی خواهمت درخت های چنار هاله صمیمی من"), + "ای دوست سلام من به تو. نمی‌خواهمت درخت‌های چنار هاله صمیمی من".to_string() + ); + } + + #[test] + fn test_removing_half_space() { + assert_eq!( + remove_half_space("نمی‌خواهی درخت‌ها را ببینیم؟"), + "نمی خواهی درخت ها را ببینیم؟".to_string() + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index 9bdd376..a009641 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,6 +15,7 @@ feature = "extract-card-number", feature = "time-ago", feature = "get-place-by-iran-national-id", + feature = "half-space", )))] compile_error!("No available Cargo feature is included"); @@ -65,3 +66,6 @@ pub mod time_ago; #[cfg(feature = "get-place-by-iran-national-id")] pub mod get_place_by_iran_national_id; + +#[cfg(feature = "half-space")] +pub mod half_space;