From 4b7e8b99a69ad39914a81d2fa0cc5f7711b68ea3 Mon Sep 17 00:00:00 2001 From: Jordan Atwood Date: Thu, 11 Nov 2021 12:33:27 -0800 Subject: [PATCH 1/2] Text: Add method to strip character accents --- .../main/java/net/runelite/client/util/Text.java | 15 +++++++++++++++ .../java/net/runelite/client/util/TextTest.java | 9 +++++++++ 2 files changed, 24 insertions(+) diff --git a/runelite-client/src/main/java/net/runelite/client/util/Text.java b/runelite-client/src/main/java/net/runelite/client/util/Text.java index 4bfb00ac672..a441dfdf81b 100644 --- a/runelite-client/src/main/java/net/runelite/client/util/Text.java +++ b/runelite-client/src/main/java/net/runelite/client/util/Text.java @@ -28,6 +28,7 @@ import com.google.common.base.CharMatcher; import com.google.common.base.Joiner; import com.google.common.base.Splitter; +import java.text.Normalizer; import java.util.Collection; import java.util.List; import java.util.regex.Matcher; @@ -42,6 +43,7 @@ public class Text { private static final JaroWinklerDistance DISTANCE = new JaroWinklerDistance(); private static final Pattern TAG_REGEXP = Pattern.compile("<[^>]*>"); + public static final Pattern DIACRITICS_AND_FRIENDS = Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+"); private static final Splitter COMMA_SPLITTER = Splitter .on(",") .omitEmptyStrings() @@ -233,4 +235,17 @@ public static boolean matchesSearchTerms(Iterable searchTerms, final Col } return true; } + + /** + * Strips diacritics and other accent marks from a string. + * Sourced from Andreas Petersson's implementation. + * + * @return A copy of the passed string, with all character accents and diacritics removed. + */ + public static String stripDiacritics(String str) + { + str = Normalizer.normalize(str, Normalizer.Form.NFD); + str = DIACRITICS_AND_FRIENDS.matcher(str).replaceAll(""); + return str; + } } diff --git a/runelite-client/src/test/java/net/runelite/client/util/TextTest.java b/runelite-client/src/test/java/net/runelite/client/util/TextTest.java index 0e0f4c0b3e6..1de8ba23f2b 100644 --- a/runelite-client/src/test/java/net/runelite/client/util/TextTest.java +++ b/runelite-client/src/test/java/net/runelite/client/util/TextTest.java @@ -73,4 +73,13 @@ public void toJagexName() assertEquals("mR nAmE", Text.toJagexName("--__--mR_-nAmE__ --")); assertEquals("Mind the gap", Text.toJagexName("Mind_-_-the-- __gap")); } + + @Test + public void stripDiacritics() + { + assertEquals("Bjorn", Text.stripDiacritics("Björn")); + assertEquals("please", Text.stripDiacritics("plëäsë")); + assertEquals("inertia", Text.stripDiacritics("ïnertïå")); + assertEquals("whole", Text.stripDiacritics("whóle")); + } } From 9002c2185617d6dcc4cee789ac1c63d13a9c175a Mon Sep 17 00:00:00 2001 From: Adam Date: Fri, 12 Nov 2021 11:52:36 -0500 Subject: [PATCH 2/2] chat filter: Ignore character accents for matching This lets plain latin-character filters to match messages with accents and diacritics which are not easily typed on all keyboard layouts. Co-authored-by: Jordan Atwood --- .../plugins/chatfilter/ChatFilterPlugin.java | 19 +++++++++--- .../chatfilter/ChatFilterPluginTest.java | 30 +++++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/runelite-client/src/main/java/net/runelite/client/plugins/chatfilter/ChatFilterPlugin.java b/runelite-client/src/main/java/net/runelite/client/plugins/chatfilter/ChatFilterPlugin.java index b5d111035a2..f67dfe480b3 100644 --- a/runelite-client/src/main/java/net/runelite/client/plugins/chatfilter/ChatFilterPlugin.java +++ b/runelite-client/src/main/java/net/runelite/client/plugins/chatfilter/ChatFilterPlugin.java @@ -36,6 +36,7 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.regex.MatchResult; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -316,6 +317,9 @@ String censorMessage(final String username, final String message) { String strippedMessage = jagexPrintableCharMatcher.retainFrom(message) .replace('\u00A0', ' '); + String stripDiacritics = Text.stripDiacritics(strippedMessage); + assert stripDiacritics.length() == strippedMessage.length(); + if (username != null && shouldFilterByName(username)) { switch (config.filterType()) @@ -332,16 +336,20 @@ String censorMessage(final String username, final String message) boolean filtered = false; for (Pattern pattern : filteredPatterns) { - Matcher m = pattern.matcher(strippedMessage); + Matcher m = pattern.matcher(stripDiacritics); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); + int idx = 0; while (m.find()) { switch (config.filterType()) { case CENSOR_WORDS: - m.appendReplacement(sb, StringUtils.repeat('*', m.group(0).length())); + MatchResult matchResult = m.toMatchResult(); + sb.append(strippedMessage, idx, matchResult.start()) + .append(StringUtils.repeat('*', matchResult.group().length())); + idx = m.end(); filtered = true; break; case CENSOR_MESSAGE: @@ -350,7 +358,7 @@ String censorMessage(final String username, final String message) return null; } } - m.appendTail(sb); + sb.append(strippedMessage.substring(idx)); strippedMessage = sb.toString(); } @@ -364,15 +372,18 @@ void updateFilteredPatterns() filteredNamePatterns.clear(); Text.fromCSV(config.filteredWords()).stream() + .map(Text::stripDiacritics) .map(s -> Pattern.compile(Pattern.quote(s), Pattern.CASE_INSENSITIVE)) .forEach(filteredPatterns::add); NEWLINE_SPLITTER.splitToList(config.filteredRegex()).stream() + .map(Text::stripDiacritics) .map(ChatFilterPlugin::compilePattern) .filter(Objects::nonNull) .forEach(filteredPatterns::add); NEWLINE_SPLITTER.splitToList(config.filteredNames()).stream() + .map(Text::stripDiacritics) .map(ChatFilterPlugin::compilePattern) .filter(Objects::nonNull) .forEach(filteredNamePatterns::add); diff --git a/runelite-client/src/test/java/net/runelite/client/plugins/chatfilter/ChatFilterPluginTest.java b/runelite-client/src/test/java/net/runelite/client/plugins/chatfilter/ChatFilterPluginTest.java index 501787576ae..b93f3b10780 100644 --- a/runelite-client/src/test/java/net/runelite/client/plugins/chatfilter/ChatFilterPluginTest.java +++ b/runelite-client/src/test/java/net/runelite/client/plugins/chatfilter/ChatFilterPluginTest.java @@ -186,6 +186,36 @@ public void testReplayedMessage() assertNull(chatFilterPlugin.censorMessage("Blue", "hello\u00A0osrs")); } + @Test + public void testFilterUnicode() + { + when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS); + when(chatFilterConfig.filteredWords()).thenReturn("filterme"); + + chatFilterPlugin.updateFilteredPatterns(); + assertEquals("plëäsë ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn")); + } + + @Test + public void testUnicodeFiltersUnicode() + { + when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS); + when(chatFilterConfig.filteredWords()).thenReturn("plëäsë"); + + chatFilterPlugin.updateFilteredPatterns(); + assertEquals("****** fïltërmë plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn")); + } + + @Test + public void testMixedUnicodeFiltersUnicode() + { + when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS); + when(chatFilterConfig.filteredWords()).thenReturn("plëäsë, filterme"); + + chatFilterPlugin.updateFilteredPatterns(); + assertEquals("****** ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn")); + } + @Test public void testMessageFromFriendIsFiltered() {