Skip to content

Commit

Permalink
Merge pull request runelite#14363 from Nightfirecat/normalize-chat-fi…
Browse files Browse the repository at this point in the history
…lter
  • Loading branch information
Nightfirecat authored Nov 12, 2021
2 parents b8a7458 + 9002c21 commit 969ee21
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
Expand Down Expand Up @@ -316,6 +317,9 @@ String censorMessage(final String username, final String message)
{
String strippedMessage = jagexPrintableCharMatcher.retainFrom(message)
.replace('\u00A0', ' ');
String stripDiacritics = Text.stripDiacritics(strippedMessage);
assert stripDiacritics.length() == strippedMessage.length();

if (username != null && shouldFilterByName(username))
{
switch (config.filterType())
Expand All @@ -332,16 +336,20 @@ String censorMessage(final String username, final String message)
boolean filtered = false;
for (Pattern pattern : filteredPatterns)
{
Matcher m = pattern.matcher(strippedMessage);
Matcher m = pattern.matcher(stripDiacritics);

StringBuffer sb = new StringBuffer();
StringBuilder sb = new StringBuilder();
int idx = 0;

while (m.find())
{
switch (config.filterType())
{
case CENSOR_WORDS:
m.appendReplacement(sb, StringUtils.repeat('*', m.group(0).length()));
MatchResult matchResult = m.toMatchResult();
sb.append(strippedMessage, idx, matchResult.start())
.append(StringUtils.repeat('*', matchResult.group().length()));
idx = m.end();
filtered = true;
break;
case CENSOR_MESSAGE:
Expand All @@ -350,7 +358,7 @@ String censorMessage(final String username, final String message)
return null;
}
}
m.appendTail(sb);
sb.append(strippedMessage.substring(idx));

strippedMessage = sb.toString();
}
Expand All @@ -364,15 +372,18 @@ void updateFilteredPatterns()
filteredNamePatterns.clear();

Text.fromCSV(config.filteredWords()).stream()
.map(Text::stripDiacritics)
.map(s -> Pattern.compile(Pattern.quote(s), Pattern.CASE_INSENSITIVE))
.forEach(filteredPatterns::add);

NEWLINE_SPLITTER.splitToList(config.filteredRegex()).stream()
.map(Text::stripDiacritics)
.map(ChatFilterPlugin::compilePattern)
.filter(Objects::nonNull)
.forEach(filteredPatterns::add);

NEWLINE_SPLITTER.splitToList(config.filteredNames()).stream()
.map(Text::stripDiacritics)
.map(ChatFilterPlugin::compilePattern)
.filter(Objects::nonNull)
.forEach(filteredNamePatterns::add);
Expand Down
15 changes: 15 additions & 0 deletions runelite-client/src/main/java/net/runelite/client/util/Text.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import java.text.Normalizer;
import java.util.Collection;
import java.util.List;
import java.util.regex.Matcher;
Expand All @@ -42,6 +43,7 @@ public class Text
{
private static final JaroWinklerDistance DISTANCE = new JaroWinklerDistance();
private static final Pattern TAG_REGEXP = Pattern.compile("<[^>]*>");
public static final Pattern DIACRITICS_AND_FRIENDS = Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
private static final Splitter COMMA_SPLITTER = Splitter
.on(",")
.omitEmptyStrings()
Expand Down Expand Up @@ -233,4 +235,17 @@ public static boolean matchesSearchTerms(Iterable<String> searchTerms, final Col
}
return true;
}

/**
* Strips diacritics and other accent marks from a string.
* Sourced from <a href="https://stackoverflow.com/a/1453284">Andreas Petersson's implementation</a>.
*
* @return A copy of the passed string, with all character accents and diacritics removed.
*/
public static String stripDiacritics(String str)
{
str = Normalizer.normalize(str, Normalizer.Form.NFD);
str = DIACRITICS_AND_FRIENDS.matcher(str).replaceAll("");
return str;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,36 @@ public void testReplayedMessage()
assertNull(chatFilterPlugin.censorMessage("Blue", "hello\u00A0osrs"));
}

@Test
public void testFilterUnicode()
{
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
when(chatFilterConfig.filteredWords()).thenReturn("filterme");

chatFilterPlugin.updateFilteredPatterns();
assertEquals("plëäsë ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
}

@Test
public void testUnicodeFiltersUnicode()
{
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
when(chatFilterConfig.filteredWords()).thenReturn("plëäsë");

chatFilterPlugin.updateFilteredPatterns();
assertEquals("****** fïltërmë plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
}

@Test
public void testMixedUnicodeFiltersUnicode()
{
when(chatFilterConfig.filterType()).thenReturn(ChatFilterType.CENSOR_WORDS);
when(chatFilterConfig.filteredWords()).thenReturn("plëäsë, filterme");

chatFilterPlugin.updateFilteredPatterns();
assertEquals("****** ******** plügïn", chatFilterPlugin.censorMessage("Blue", "plëäsë fïltërmë plügïn"));
}

@Test
public void testMessageFromFriendIsFiltered()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,13 @@ public void toJagexName()
assertEquals("mR nAmE", Text.toJagexName("--__--mR_-nAmE__ --"));
assertEquals("Mind the gap", Text.toJagexName("Mind_-_-the-- __gap"));
}

@Test
public void stripDiacritics()
{
assertEquals("Bjorn", Text.stripDiacritics("Björn"));
assertEquals("please", Text.stripDiacritics("plëäsë"));
assertEquals("inertia", Text.stripDiacritics("ïnertïå"));
assertEquals("whole", Text.stripDiacritics("whóle"));
}
}

0 comments on commit 969ee21

Please sign in to comment.