Skip to content

Commit

Permalink
Merge pull request #43 from salarmotevalli/feature/arabic-chars-mod
Browse files Browse the repository at this point in the history
add arabic chars mod
  • Loading branch information
ali77gh authored Jan 19, 2024
2 parents 1ff770f + 0dd76d3 commit bfccda0
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 3 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ full = [
"digits",
"find-capital-by-province",
"persian-chars",
"arabic-chars",
"national-id",
"remove-ordinal-suffix",
"url-fix",
Expand All @@ -57,6 +58,7 @@ commas = []
digits = []
find-capital-by-province = ["persian-chars"]
persian-chars = []
arabic-chars = []
national-id = ["dep:thiserror"]
remove-ordinal-suffix = []
url-fix = ["dep:urlencoding"]
Expand Down
9 changes: 7 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ all: build check test docs
fmt:
cargo fmt


build: full default add-ordinal-suffix commas digits find-capital-by-province persian-chars national-id remove-ordinal-suffix url-fix verity-card-number time-ago phone-number bill number-to-words get-bank-name-by-card-number extract-card-number get-place-by-iran-national-id half-space legal-id words-to-number sheba

check: clippy lint
Expand Down Expand Up @@ -57,6 +56,11 @@ persian-chars:
cargo build --no-default-features --features=persian-chars
@ ls -sh target/debug/*.rlib

arabic-chars:
@ echo ""
cargo build --no-default-features --features=arabic-chars
@ ls -sh target/debug/*.rlib

national-id:
@ echo ""
cargo build --no-default-features --features=national-id
Expand Down Expand Up @@ -135,4 +139,5 @@ words-to-number:
sheba:
@ echo ""
cargo build --no-default-features --features=sheba
@ ls -sh target/debug/*.rlib
@ ls -sh target/debug/*.rlib

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ Rust version of Persian Tools
- [x] findCapitalByProvince
- [x] getBankNameByCardNumber
- [x] getPlaceByIranNationalId
- [x] isArabic
- [x] halfSpace
- [ ] isArabic
- [x] isPersian
- [x] legalId
- [x] nationalId
Expand Down
221 changes: 221 additions & 0 deletions src/arabic_chars/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
use std::borrow::Cow;

pub mod chars {
pub static AR_NUMBER: &str = "٠١٢٣٤٥٦٧٨٩"; // tick
pub static AR_TEXT: &str =
"ابتثجحخدذرزسشصضطظعغفقكلمنهويآةى؟ؠءأؤإ ؘ ؙ ؚ؛ ً ٌ ٍ َ ُ ِ ّ ْ ٓ ٔ ٕ ٖ ٗ ٘ ٙ ٚ ٛ ٝ ٞ ٟ٠١٢٣٤٥٦٧٨٩";
}

/// Return true if the entered string includes arabic characters
pub fn has_arabic(input: impl AsRef<str>) -> bool {
input
.as_ref()
.chars()
.any(|char| char != ' ' && chars::AR_TEXT.contains(char))
}

/// Return true if the entered string does not include other-language characters.
pub fn is_arabic(input: impl AsRef<str>) -> bool {
!input.as_ref().is_empty()
&& input
.as_ref()
.chars()
.all(|char| chars::AR_TEXT.contains(char))
}

/// Description: Replaces all instances of ی and ک with ي and ك,
/// respectively. It should not make any changes to Persian text
/// surrounded by appropriate templates.
pub fn to_arabic_chars(input: impl AsRef<str>) -> String {
input
.as_ref()
.replace('ک', "ك")
.replace('ی', "ي")
.replace('ی', "ى")
}

pub fn to_arabic_chars_mut<I>(mut input: I)
where
I: AsMut<str> + AsRef<str>,
{
input
.as_ref()
.match_indices('ی')
.chain(input.as_ref().match_indices('ک'))
.map(|(index, _)| index)
.collect::<Vec<_>>()
.into_iter()
.for_each(|index|
// SAFETY:
// We do not change the length of `&mut str`
// We replace two bytes values only.
unsafe {
// Bytes for 'ی': [0xDB, 0x8C]
// Bytes for 'ک': [0xDA, 0xA9]
let next_byte = input.as_mut().as_bytes_mut().get_unchecked_mut(index + 1);

match next_byte {
// Characters 'ی' will be converted to 'ي'
0x8C => {
*next_byte = 0x8A;
*input.as_mut().as_bytes_mut().get_unchecked_mut(index) = 0xD9;
}
// Character 'ک' will be converted to 'ك'
0xA9 => {
*next_byte = 0x83;
*input.as_mut().as_bytes_mut().get_unchecked_mut(index) = 0xD9;
}
other => {
unreachable!("Second byte: {other}")
}
}
});
}

pub trait ToArabicChars {
fn to_arabic_chars(&self) -> String;
}

pub trait ToArabicCharsMut {
fn to_arabic_chars_mut(&mut self);
}

pub trait HasArabic {
fn has_arabic(&self) -> bool;
}

pub trait IsArabic {
fn is_arabic(&self) -> bool;
}

impl ToArabicChars for str {
fn to_arabic_chars(&self) -> String {
to_arabic_chars(self)
}
}

impl ToArabicChars for String {
fn to_arabic_chars(&self) -> String {
to_arabic_chars(self)
}
}

impl ToArabicChars for Cow<'_, str> {
fn to_arabic_chars(&self) -> String {
to_arabic_chars(self)
}
}

impl ToArabicCharsMut for String {
fn to_arabic_chars_mut(&mut self) {
to_arabic_chars_mut(self)
}
}

impl ToArabicCharsMut for Cow<'_, str> {
fn to_arabic_chars_mut(&mut self) {
to_arabic_chars_mut(self.to_mut())
}
}

impl HasArabic for str {
fn has_arabic(&self) -> bool {
has_arabic(self)
}
}

impl HasArabic for String {
fn has_arabic(&self) -> bool {
has_arabic(self)
}
}

impl HasArabic for Cow<'_, str> {
fn has_arabic(&self) -> bool {
has_arabic(self)
}
}

impl IsArabic for str {
fn is_arabic(&self) -> bool {
is_arabic(self)
}
}

impl IsArabic for String {
fn is_arabic(&self) -> bool {
is_arabic(self)
}
}

impl IsArabic for Cow<'_, str> {
fn is_arabic(&self) -> bool {
is_arabic(self)
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn has_arabic_test() {
assert_eq!("هل هذا نص عربي؟".has_arabic(), true);
assert_eq!("هل هذا نص عربي؟".has_arabic(), true);
assert_eq!(
"هل يمكن للنظام أن يتعرف عن طريق الخطأ على الخيارات الأخرى كنص عربي؟".has_arabic(),
true
);
assert_eq!("This text includes عربي".has_arabic(), true);
assert_eq!("Это персидский ص текст?".has_arabic(), true);
assert_eq!(
"أكد رئيس اللجنة العسكرية الممثلة لحكومة الوفاق أراضي البلاد.".has_arabic(),
true
);

assert_eq!("Lorem Ipsum Test".has_arabic(), false);
assert_eq!("これはペルシア語のテキストですか".has_arabic(), false);
assert_eq!("Это персидский текст?".has_arabic(), false);
assert_eq!("这是波斯文字吗?".has_arabic(), false);
assert_eq!("".has_arabic(), false);
}

#[test]
fn is_arabic_test() {
assert_eq!("هل هذا نص عربي؟".is_arabic(), true);
assert_eq!(
"هل يمكن للنظام أن يتعرف عن طريق الخطأ على الخيارات الأخرى كنص عربي؟".is_arabic(),
true
);

assert_eq!("Lorem Ipsum Test".is_arabic(), false);
assert_eq!("これはペルシア語のテキストですか".is_arabic(), false);
assert_eq!("Это персидский текст?".is_arabic(), false);
assert_eq!("这是波斯文字吗?".is_arabic(), false);
assert_eq!("این متن عربی است".is_arabic(), false);
assert_eq!(
"آیا سیستم میتواند گزینه های دیگری را به اشتباه به عنوان متن فارسی تشخیص دهد؟"
.is_arabic(),
false
);
assert_eq!("".is_arabic(), false);
assert_eq!("مهدی".to_arabic_chars().is_arabic(), true);
assert_eq!("شاه".to_arabic_chars().is_arabic(), true);
}

#[test]
fn test_name() {
assert_eq!("علی".to_arabic_chars(), "علي");

assert_eq!(String::from("علی").to_arabic_chars(), "علي");

let mut name = String::from("علی در اراک");
name.to_arabic_chars_mut();
assert_eq!(name, "علي در اراك");

let mut name = Cow::Borrowed("علی در اراک");
assert_eq!(name.to_arabic_chars(), "علي در اراك");
name.to_arabic_chars_mut();
assert_eq!(name, Cow::<String>::Owned("علي در اراك".to_string()))
}
}
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
feature = "digits",
feature = "find-capital-by-province",
feature = "persian-chars",
feature = "arabic-chars",
feature = "national-id",
feature = "remove-ordinal-suffix",
feature = "url-fix",
Expand Down Expand Up @@ -37,6 +38,9 @@ pub mod find_capital_by_province;
#[cfg(feature = "persian-chars")]
pub mod persian_chars;

#[cfg(feature = "persian-chars")]
pub mod arabic_chars;

#[cfg(feature = "national-id")]
pub mod national_id;

Expand Down

0 comments on commit bfccda0

Please sign in to comment.