Skip to content

Commit

Permalink
Move regex parser and check logic to analyzer-commons (#861)
Browse files Browse the repository at this point in the history
  • Loading branch information
nils-werner-sonarsource authored Oct 11, 2021
1 parent 432b82c commit b4d36df
Show file tree
Hide file tree
Showing 43 changed files with 150 additions and 2,359 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@
*/
package org.sonar.php.checks.regex;

import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.CheckForNull;
import javax.annotation.Nullable;
import org.sonar.php.checks.utils.CheckUtils;
import org.sonar.php.checks.utils.FunctionUsageCheck;
import org.sonar.php.regex.PhpRegexCheck;
import org.sonar.php.regex.PhpRegexUtils;
import org.sonar.php.regex.RegexCheck;
import org.sonar.php.regex.RegexCheckContext;
import org.sonar.php.utils.collections.SetUtils;
import org.sonar.plugins.php.api.tree.Tree;
Expand All @@ -41,13 +41,14 @@
import org.sonar.plugins.php.api.visitors.CheckContext;
import org.sonar.plugins.php.api.visitors.PhpIssue;
import org.sonar.plugins.php.api.visitors.PreciseIssue;
import org.sonarsource.analyzer.commons.regex.RegexIssueLocation;
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
import org.sonarsource.analyzer.commons.regex.ast.FlagSet;
import org.sonarsource.analyzer.commons.regex.ast.RegexSyntaxElement;

import static org.sonar.php.regex.PhpRegexUtils.BRACKET_DELIMITERS;

public abstract class AbstractRegexCheck extends FunctionUsageCheck implements RegexCheck {
public abstract class AbstractRegexCheck extends FunctionUsageCheck implements PhpRegexCheck {

private static final Pattern DELIMITER_PATTERN = Pattern.compile("^[^\\w\\r\\n\\t\\f\\v ]");

Expand Down Expand Up @@ -131,27 +132,22 @@ protected final RegexParseResult regexForLiteral(FlagSet flags, LiteralTree lite

public abstract void checkRegex(RegexParseResult regexParseResult, FunctionCallTree regexFunctionCall);

public final void newIssue(RegexSyntaxElement regexTree, String message) {
newIssue(regexTree, message, Collections.emptyList());
}

public final void newIssue(RegexSyntaxElement regexTree, String message, List<RegexIssueLocation> secondaries) {
newIssue(regexTree, message, secondaries, 0);
}

public final void newIssue(RegexSyntaxElement regexTree, String message, List<RegexIssueLocation> secondaries, double cost) {
public void newIssue(RegexSyntaxElement regexTree, String message, @Nullable Integer cost, List<RegexIssueLocation> secondaries) {
if (reportedRegexTrees.add(regexTree)) {
PreciseIssue issue = regexContext.newIssue(this, regexTree, message);
secondaries.forEach(issue::secondary);
if (cost != 0) {
secondaries.stream().map(PhpRegexCheck.PhpRegexIssueLocation::new).forEach(issue::secondary);
if (cost != null) {
issue.cost(cost);
}
}
}

public final void newIssue(Tree tree, String message, List<RegexIssueLocation> secondaries) {
public final void newIssue(Tree tree, String message, @Nullable Integer cost, List<RegexIssueLocation> secondaries) {
PreciseIssue issue = newIssue(tree, message);
secondaries.forEach(issue::secondary);
secondaries.stream().map(PhpRegexCheck.PhpRegexIssueLocation::new).forEach(issue::secondary);
if (cost != null) {
issue.cost(cost);
}
}

@CheckForNull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,99 +19,16 @@
*/
package org.sonar.php.checks.regex;

import java.util.List;
import java.util.stream.Collectors;
import org.sonar.check.Rule;
import org.sonar.plugins.php.api.tree.expression.FunctionCallTree;
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
import org.sonarsource.analyzer.commons.regex.ast.BoundaryTree;
import org.sonarsource.analyzer.commons.regex.ast.DisjunctionTree;
import org.sonarsource.analyzer.commons.regex.ast.NonCapturingGroupTree;
import org.sonarsource.analyzer.commons.regex.ast.RegexBaseVisitor;
import org.sonarsource.analyzer.commons.regex.ast.RegexTree;
import org.sonarsource.analyzer.commons.regex.ast.SequenceTree;
import org.sonarsource.analyzer.commons.regex.finders.AnchorPrecedenceFinder;

@Rule(key = "S5850")
public class AnchorPrecedenceCheck extends AbstractRegexCheck {

public static final String MESSAGE = "Group parts of the regex together to make the intended operator precedence explicit.";

@Override
public void checkRegex(RegexParseResult regexParseResult, FunctionCallTree regexFunctionCall) {
new Visitor().visit(regexParseResult);
}

private enum Position {
BEGINNING, END
}

private class Visitor extends RegexBaseVisitor {

@Override
public void visitDisjunction(DisjunctionTree tree) {
List<RegexTree> alternatives = tree.getAlternatives();
if ((anchoredAt(alternatives, Position.BEGINNING) || anchoredAt(alternatives, Position.END))
&& notAnchoredElseWhere(alternatives)) {
newIssue(tree, MESSAGE);
}
super.visitDisjunction(tree);
}

private boolean anchoredAt(List<RegexTree> alternatives, Position position) {
int itemIndex = position == Position.BEGINNING ? 0 : (alternatives.size() - 1);
RegexTree firstOrLast = alternatives.get(itemIndex);
return isAnchored(firstOrLast, position);
}

private boolean notAnchoredElseWhere(List<RegexTree> alternatives) {
if (isAnchored(alternatives.get(0), Position.END)
|| isAnchored(alternatives.get(alternatives.size() - 1), Position.BEGINNING)) {
return false;
}
for (RegexTree alternative : alternatives.subList(1, alternatives.size() - 1)) {
if (isAnchored(alternative, Position.BEGINNING) || isAnchored(alternative, Position.END)) {
return false;
}
}
return true;
}

private boolean isAnchored(RegexTree tree, Position position) {
if (!tree.is(RegexTree.Kind.SEQUENCE)) {
return false;
}
SequenceTree sequence = (SequenceTree) tree;
List<RegexTree> items = sequence.getItems().stream()
.filter(item -> !isFlagSetter(item))
.collect(Collectors.toList());
if (items.isEmpty()) {
return false;
}
int index = position == Position.BEGINNING ? 0 : (items.size() - 1);
RegexTree firstOrLast = items.get(index);
return firstOrLast.is(RegexTree.Kind.BOUNDARY) && isAnchor((BoundaryTree) firstOrLast);
}

private boolean isAnchor(BoundaryTree tree) {
switch (tree.type()) {
case INPUT_START:
case LINE_START:
case INPUT_END:
case INPUT_END_FINAL_TERMINATOR:
case LINE_END:
return true;
default:
return false;
}
}

/**
* Return whether the given regex is a non-capturing group without contents, i.e. one that only sets flags for the
* rest of the expression
*/
private boolean isFlagSetter(RegexTree tree) {
return tree.is(RegexTree.Kind.NON_CAPTURING_GROUP) && ((NonCapturingGroupTree) tree).getElement() == null;
}

new AnchorPrecedenceFinder(this::newIssue).visit(regexParseResult);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,61 +19,16 @@
*/
package org.sonar.php.checks.regex;

import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import org.sonar.check.Rule;
import org.sonar.php.regex.ast.PhpRegexBaseVisitor;
import org.sonar.plugins.php.api.tree.expression.FunctionCallTree;
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
import org.sonarsource.analyzer.commons.regex.ast.CharacterClassElementTree;
import org.sonarsource.analyzer.commons.regex.ast.CharacterClassUnionTree;
import org.sonarsource.analyzer.commons.regex.ast.RegexSyntaxElement;
import org.sonarsource.analyzer.commons.regex.helpers.SimplifiedRegexCharacterClass;
import org.sonarsource.analyzer.commons.regex.finders.DuplicatesInCharacterClassFinder;

@Rule(key = "S5869")
public class DuplicatesInCharacterClassCheck extends AbstractRegexCheck {

private static final String MESSAGE = "Remove duplicates in this character class.";

@Override
public void checkRegex(RegexParseResult regexParseResult, FunctionCallTree regexFunctionCall) {
new DuplicateFinder().visit(regexParseResult);
}

private class DuplicateFinder extends PhpRegexBaseVisitor {

@Override
public void visitCharacterClassUnion(CharacterClassUnionTree tree) {
Set<RegexSyntaxElement> duplicates = new LinkedHashSet<>();
SimplifiedRegexCharacterClass characterClass = new SimplifiedRegexCharacterClass();
for (CharacterClassElementTree element : tree.getCharacterClasses()) {
SimplifiedRegexCharacterClass elementCharacterClass;
try {
elementCharacterClass = new SimplifiedRegexCharacterClass(element);
} catch (IllegalArgumentException e) {
// TODO: remove exception catching once the underlying problem is fixed: https://github.com/SonarSource/sonar-analyzer-commons/issues/156
return;
}

List<RegexSyntaxElement> intersections = elementCharacterClass.findIntersections(characterClass);
if (!intersections.isEmpty()) {
// The element the current element is intersecting with should be included as well.
duplicates.addAll(intersections);
duplicates.add(element);
}
characterClass.add(element);
}
if (!duplicates.isEmpty()) {
List<RegexIssueLocation> secondaries = duplicates.stream()
.skip(1)
.map(duplicate -> new RegexIssueLocation(duplicate, "Additional duplicate"))
.collect(Collectors.toList());
newIssue(duplicates.iterator().next(), MESSAGE, secondaries);
}
super.visitCharacterClassUnion(tree);
}

new DuplicatesInCharacterClassFinder(this::newIssue).visit(regexParseResult);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,54 +20,15 @@
package org.sonar.php.checks.regex;

import org.sonar.check.Rule;
import org.sonar.php.regex.ast.PhpRegexBaseVisitor;
import org.sonar.plugins.php.api.tree.expression.FunctionCallTree;
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
import org.sonarsource.analyzer.commons.regex.ast.DisjunctionTree;
import org.sonarsource.analyzer.commons.regex.ast.GroupTree;
import org.sonarsource.analyzer.commons.regex.ast.RegexTree;
import org.sonarsource.analyzer.commons.regex.ast.RepetitionTree;
import org.sonarsource.analyzer.commons.regex.ast.SequenceTree;
import org.sonarsource.analyzer.commons.regex.finders.EmptyStringRepetitionFinder;

@Rule(key = "S5842")
public class EmptyStringRepetitionCheck extends AbstractRegexCheck {

private static final String MESSAGE = "Rework this part of the regex to not match the empty string.";

@Override
public void checkRegex(RegexParseResult regexParseResult, FunctionCallTree regexFunctionCall) {
new Visitor().visit(regexParseResult);
}

private class Visitor extends PhpRegexBaseVisitor {

@Override
public void visitRepetition(RepetitionTree tree) {
RegexTree element = tree.getElement();
if (matchEmptyString(element)) {
newIssue(element, MESSAGE);
}
}

private boolean matchEmptyString(RegexTree element) {
switch (element.kind()) {
case SEQUENCE:
return ((SequenceTree) element).getItems().stream().allMatch(this::matchEmptyString);
case DISJUNCTION:
return ((DisjunctionTree) element).getAlternatives().stream().anyMatch(this::matchEmptyString);
case REPETITION:
return ((RepetitionTree) element).getQuantifier().getMinimumRepetitions() == 0;
case LOOK_AROUND:
case BOUNDARY:
return true;
default:
if (element instanceof GroupTree) {
RegexTree nestedElement = ((GroupTree) element).getElement();
return nestedElement == null || matchEmptyString(nestedElement);
}
return false;
}
}

new EmptyStringRepetitionFinder(this::newIssue).visit(regexParseResult);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,92 +19,16 @@
*/
package org.sonar.php.checks.regex;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import org.sonar.check.Rule;
import org.sonar.php.regex.RegexCheck;
import org.sonar.php.regex.ast.PhpRegexBaseVisitor;
import org.sonar.plugins.php.api.tree.expression.FunctionCallTree;
import org.sonarsource.analyzer.commons.regex.RegexParseResult;
import org.sonarsource.analyzer.commons.regex.ast.CharacterClassTree;
import org.sonarsource.analyzer.commons.regex.ast.CharacterClassUnionTree;
import org.sonarsource.analyzer.commons.regex.ast.CharacterTree;
import org.sonarsource.analyzer.commons.regex.ast.RegexSyntaxElement;
import org.sonarsource.analyzer.commons.regex.finders.GraphemeInClassFinder;

@Rule(key = "S5868")
public class GraphemeClustersInClassesCheck extends AbstractRegexCheck {

private static final String MESSAGE = "Extract %d Grapheme Cluster(s) from this character class.";

@Override
public void checkRegex(RegexParseResult regexParseResult, FunctionCallTree regexFunctionCall) {
new GraphemeInClassVisitor().visit(regexParseResult);
}

private class GraphemeInClassVisitor extends PhpRegexBaseVisitor {

private final List<RegexIssueLocation> graphemeClusters = new ArrayList<>();

@Override
public void visitCharacterClass(CharacterClassTree tree) {
super.visitCharacterClass(tree);
if (!graphemeClusters.isEmpty()) {
newIssue(tree, String.format(MESSAGE, graphemeClusters.size()), graphemeClusters);
}
graphemeClusters.clear();
}

@Override
public void visitCharacterClassUnion(CharacterClassUnionTree tree) {
graphemeClusters.addAll(GraphemeHelper.getGraphemeInList(tree.getCharacterClasses()));
super.visitCharacterClassUnion(tree);
}

}

private static class GraphemeHelper {

// M (Mark) is "a character intended to be combined with another character (e.g. accents, umlauts, enclosing boxes, etc.)."
// See https://www.regular-expressions.info/unicode.html
private static final Pattern MARK_PATTERN = Pattern.compile("\\p{M}");

private GraphemeHelper() {
}

private static List<RegexCheck.RegexIssueLocation> getGraphemeInList(List<? extends RegexSyntaxElement> trees) {
List<RegexCheck.RegexIssueLocation> result = new ArrayList<>();
List<RegexSyntaxElement> codePoints = new ArrayList<>();
for (RegexSyntaxElement child : trees) {
if (child instanceof CharacterTree) {
CharacterTree currentCharacter = (CharacterTree) child;
if (!currentCharacter.isEscapeSequence()) {
if (!isMark(currentCharacter)) {
addCurrentGrapheme(result, codePoints);
codePoints.clear();
codePoints.add(currentCharacter);
} else if (!codePoints.isEmpty()) {
codePoints.add(currentCharacter);
}
continue;
}
}
addCurrentGrapheme(result, codePoints);
codePoints.clear();
}
addCurrentGrapheme(result, codePoints);
return result;
}

private static boolean isMark(CharacterTree currentChar) {
return MARK_PATTERN.matcher(currentChar.characterAsString()).matches();
}

private static void addCurrentGrapheme(List<RegexCheck.RegexIssueLocation> result, List<RegexSyntaxElement> codePoints) {
if (codePoints.size() > 1) {
result.add(new RegexCheck.RegexIssueLocation(codePoints, ""));
}
}

new GraphemeInClassFinder(this::newIssue).visit(regexParseResult);
}
}
Loading

0 comments on commit b4d36df

Please sign in to comment.