Skip to content

Commit

Permalink
Lexer improvements for highlighter
Browse files Browse the repository at this point in the history
  • Loading branch information
mjdetullio committed Mar 6, 2016
1 parent 0eaeaaa commit 876b99d
Show file tree
Hide file tree
Showing 6 changed files with 393 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,19 @@
*/
package org.sonar.objectivec.api;

import com.google.common.collect.ImmutableList;
import com.sonar.sslr.api.AstNode;
import com.sonar.sslr.api.TokenType;

public enum ObjectiveCTokenType implements TokenType {
import java.util.List;

NUMERIC_LITERAL;
public enum ObjectiveCTokenType implements TokenType {
CHARACTER_LITERAL,
DOUBLE_LITERAL,
FLOAT_LITERAL,
INTEGER_LITERAL,
LONG_LITERAL,
STRING_LITERAL;

@Override
public String getName() {
Expand All @@ -41,4 +48,7 @@ public boolean hasToBeSkippedFromAst(AstNode node) {
return false;
}

public static List numberLiterals() {
return ImmutableList.of(DOUBLE_LITERAL, FLOAT_LITERAL, INTEGER_LITERAL, LONG_LITERAL);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.sonar.api.batch.fs.InputFile;
import org.sonar.api.source.Highlightable;
import org.sonar.objectivec.api.ObjectiveCKeyword;
import org.sonar.objectivec.api.ObjectiveCTokenType;
import org.sonar.squidbridge.SquidAstVisitor;

import java.io.IOException;
Expand Down Expand Up @@ -116,6 +117,15 @@ public void visitToken(Token token) {
if (token.getType() instanceof ObjectiveCKeyword) {
highlightToken(token, "k");
}

if (ObjectiveCTokenType.numberLiterals().contains(token.getType())) {
highlightToken(token, "c");
}

if (ObjectiveCTokenType.STRING_LITERAL.equals(token.getType())
|| ObjectiveCTokenType.CHARACTER_LITERAL.equals(token.getType())) {
highlightToken(token, "s");
}
}

private void highlightToken(Token token, String typeOfText) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* SonarQube Objective-C (Community) Plugin
* Copyright (C) 2012-2016 OCTO Technology, Backelite, and contributors
* mailto:[email protected]
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.objectivec.lexer;

import com.sonar.sslr.impl.Lexer;
import org.sonar.sslr.channel.Channel;
import org.sonar.sslr.channel.CodeReader;

/**
* @author Sonar C++ Plugin (Community) authors
*/
public class BackslashChannel extends Channel<Lexer> {
@Override
public boolean consume(CodeReader code, Lexer output) {
char ch = (char) code.peek();

if ((ch == '\\') && isNewLine(code.charAt(1))) {
// just throw away the backslash
code.pop();
return true;
}

return false;
}

private static boolean isNewLine(char ch) {
return (ch == '\n') || (ch == '\r');
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* SonarQube Objective-C (Community) Plugin
* Copyright (C) 2012-2016 OCTO Technology, Backelite, and contributors
* mailto:[email protected]
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.objectivec.lexer;

import com.sonar.sslr.api.Token;
import com.sonar.sslr.impl.Lexer;
import org.sonar.objectivec.api.ObjectiveCTokenType;
import org.sonar.sslr.channel.Channel;
import org.sonar.sslr.channel.CodeReader;

/**
* @author Sonar C++ Plugin (Community) authors
*/
public class CharacterLiteralsChannel extends Channel<Lexer> {
private static final char EOF = (char) -1;

private final StringBuilder sb = new StringBuilder();

private int index;
private char ch;

@Override
public boolean consume(CodeReader code, Lexer output) {
int line = code.getLinePosition();
int column = code.getColumnPosition();
index = 0;
readPrefix(code);
if ((ch != '\'')) {
return false;
}
if (!read(code)) {
return false;
}
readUdSuffix(code);
for (int i = 0; i < index; i++) {
sb.append((char) code.pop());
}
output.addToken(Token.builder()
.setLine(line)
.setColumn(column)
.setURI(output.getURI())
.setValueAndOriginalValue(sb.toString())
.setType(ObjectiveCTokenType.CHARACTER_LITERAL)
.build());
sb.setLength(0);
return true;
}

private boolean read(CodeReader code) {
index++;
while (code.charAt(index) != ch) {
if (code.charAt(index) == EOF) {
return false;
}
if (code.charAt(index) == '\\') {
// escape
index++;
}
index++;
}
index++;
return true;
}

private void readPrefix(CodeReader code) {
ch = code.charAt(index);
if ((ch == 'u') || (ch == 'U') || ch == 'L') {
index++;
ch = code.charAt(index);
}
}

private void readUdSuffix(CodeReader code) {
for (int start_index = index, len = 0; ; index++) {
char c = code.charAt(index);
if (c == EOF) {
return;
}
if ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c == '_')) {
len++;
} else {
if (c >= '0' && c <= '9') {
if (len > 0) {
len++;
} else {
index = start_index;
return;
}
} else {
return;
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
*/
package org.sonar.objectivec.lexer;

import com.sonar.sslr.api.GenericTokenType;
import com.sonar.sslr.impl.Lexer;
import com.sonar.sslr.impl.channel.BlackHoleChannel;
import com.sonar.sslr.impl.channel.IdentifierAndKeywordChannel;
Expand All @@ -27,11 +28,33 @@
import org.sonar.objectivec.api.ObjectiveCKeyword;
import org.sonar.objectivec.api.ObjectiveCPunctuator;

import static com.sonar.sslr.api.GenericTokenType.LITERAL;
import static com.sonar.sslr.impl.channel.RegexpChannelBuilder.commentRegexp;
import static com.sonar.sslr.impl.channel.RegexpChannelBuilder.regexp;
import static org.sonar.objectivec.api.ObjectiveCTokenType.DOUBLE_LITERAL;
import static org.sonar.objectivec.api.ObjectiveCTokenType.FLOAT_LITERAL;
import static org.sonar.objectivec.api.ObjectiveCTokenType.INTEGER_LITERAL;
import static org.sonar.objectivec.api.ObjectiveCTokenType.LONG_LITERAL;

public class ObjectiveCLexer {
private static final String EXP_REGEXP = "(?:[Ee][+-]?+[0-9_]++)";
private static final String BINARY_EXP_REGEXP = "(?:[Pp][+-]?+[0-9_]++)";
private static final String FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP = "(?:" +
// Decimal
"[0-9][0-9_]*+\\.([0-9_]++)?+" + EXP_REGEXP + "?+" +
"|" + "\\.[0-9][0-9_]*+" + EXP_REGEXP + "?+" +
"|" + "[0-9][0-9_]*+" + EXP_REGEXP +
// Hexadecimal
"|" + "0[xX][0-9_a-fA-F]++\\.[0-9_a-fA-F]*+" + BINARY_EXP_REGEXP +
"|" + "0[xX][0-9_a-fA-F]++" + BINARY_EXP_REGEXP +
")";
private static final String INTEGER_LITERAL_REGEXP = "(?:" +
// Hexadecimal
"0[xX][0-9_a-fA-F]++" +
// Binary (Java 7)
"|" + "0[bB][01_]++" +
// Decimal and Octal
"|" + "[0-9][0-9_]*+" +
")";

private ObjectiveCLexer() {
// prevents outside instantiation
Expand All @@ -44,23 +67,37 @@ public static Lexer create() {
public static Lexer create(ObjectiveCConfiguration conf) {
return Lexer.builder()
.withCharset(conf.getCharset())
.withFailIfNoChannelToConsumeOneCharacter(true)

.withFailIfNoChannelToConsumeOneCharacter(false)
/* Remove whitespace */
.withChannel(new BlackHoleChannel("\\s++"))

/* Comments */
.withChannel(commentRegexp("//[^\\n\\r]*+"))
.withChannel(commentRegexp("/\\*[\\s\\S]*?\\*/"))
.withChannel(commentRegexp("/\\*", "[\\s\\S]*?", "\\*/"))

/* Backslash at the end of the line: just throw away */
.withChannel(new BackslashChannel())

/* Character literals */
.withChannel(new CharacterLiteralsChannel())

/* String literals */
.withChannel(new StringLiteralsChannel())

/* Number literals */
.withChannel(regexp(FLOAT_LITERAL, FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[fF]|[0-9][0-9_]*+[fF]"))
.withChannel(regexp(DOUBLE_LITERAL, FLOATING_LITERAL_WITHOUT_SUFFIX_REGEXP + "[dD]?+|[0-9][0-9_]*+[dD]"))
.withChannel(regexp(LONG_LITERAL, INTEGER_LITERAL_REGEXP + "[lL]"))
.withChannel(regexp(INTEGER_LITERAL, INTEGER_LITERAL_REGEXP))

/* Identifiers, keywords, and punctuators */
.withChannel(new IdentifierAndKeywordChannel("(#|@)?[a-zA-Z]([a-zA-Z0-9_]*[a-zA-Z0-9])?+((\\s+)?\\*)?", true, ObjectiveCKeyword.values()))
.withChannel(new IdentifierAndKeywordChannel("[#@]?[a-zA-Z]([a-zA-Z0-9_]*[a-zA-Z0-9])?+((\\s+)?\\*)?", true, ObjectiveCKeyword.values()))
.withChannel(new PunctuatorChannel(ObjectiveCPunctuator.values()))

/* All other tokens */
.withChannel(regexp(LITERAL, "[^\r\n\\s/]+"))

.withChannel(new BlackHoleChannel("[\\s]"))
/* All other tokens -- must be last channel */
.withChannel(regexp(GenericTokenType.IDENTIFIER, "[^\r\n\\s/]+"))

.build();
}

}
Loading

0 comments on commit 876b99d

Please sign in to comment.