From 643a9e3290c34052de516dfdf10718710e082afe Mon Sep 17 00:00:00 2001 From: Maximilian Schwerin Date: Wed, 28 Oct 2015 11:38:32 +0100 Subject: [PATCH] Match unicode characters when comparing for alphanum --- HtmlDiff.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/HtmlDiff.php b/HtmlDiff.php index 74a5d35..15dc7d0 100644 --- a/HtmlDiff.php +++ b/HtmlDiff.php @@ -103,7 +103,7 @@ private function ConvertHtmlToListOfWords( $characterString ) { $current_word = $character; $mode = 'whitespace'; } else { - if( ctype_alnum( $character ) && ( strlen($current_word) == 0 || ctype_alnum( $current_word ) ) ) { + if( $this->IsAlphaNum( $character ) && ( strlen($current_word) == 0 || $this->IsAlphaNum( $current_word ) ) ) { $current_word .= $character; } else { $words[] = $current_word; @@ -165,6 +165,10 @@ private function IsWhiteSpace( $value ) { return !preg_match( '[^\s]', $value ); } + private function IsAlphaNum( $value ) { + return preg_match( '/[\p{L}\p{N}]+/u', $value ); + } + private function Explode( $value ) { // as suggested by @onassar return preg_split( '//u', $value );