From 67059d12e0442e3aefb9093ba263e3249c9cefd9 Mon Sep 17 00:00:00 2001 From: Robert Stoll Date: Mon, 29 Sep 2014 14:17:30 +0200 Subject: [PATCH 1/3] Fixed token file generation for tokens with backslashes in their corresponding literal part --- tool/src/main/java/org/antlr/codegen/CodeGenerator.java | 1 + 1 file changed, 1 insertion(+) diff --git a/tool/src/main/java/org/antlr/codegen/CodeGenerator.java b/tool/src/main/java/org/antlr/codegen/CodeGenerator.java index b279bd565..ac2741707 100644 --- a/tool/src/main/java/org/antlr/codegen/CodeGenerator.java +++ b/tool/src/main/java/org/antlr/codegen/CodeGenerator.java @@ -909,6 +909,7 @@ protected ST genTokenVocabOutput() { // now dump the strings for (String literal : grammar.getStringLiterals()) { int tokenType = grammar.getTokenType(literal); + literal = literal.replace("\\", "\\\\"); if ( tokenType>=Label.MIN_TOKEN_TYPE ) { vocabFileST.addAggr("tokens.{name,type}", literal, Utils.integer(tokenType)); } From 7ee4021b6affff72ad2a31287cd8c7b34cc5c30a Mon Sep 17 00:00:00 2001 From: Robert Stoll Date: Mon, 29 Sep 2014 16:57:44 +0200 Subject: [PATCH 2/3] added regression tests --- .../antlr/test/TestJavaCodeGeneration.java | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java b/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java index 5a189cdd4..fd07db94b 100644 --- a/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java +++ b/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java @@ -29,6 +29,7 @@ import org.junit.Test; +import static org.hamcrest.core.IsInstanceOf.instanceOf; import static org.junit.Assert.*; /** General code generation testing; compilation and/or execution. @@ -158,4 +159,87 @@ public void testSemanticPredicateAnalysisStackOverflow() throws Exception { boolean success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false); assertTrue(success); } + + /** + * Regression test for antlr/antlr3#163 - NullPointerException when literal + * of a token is not escaped + */ + @Test + public void testTokenVocabWithUnescapedBackslashThrowsNullPointerException(){ + mkdir(tmpdir); + writeFile(tmpdir, "T2.tokens", "Backslash=4\n'\\\\'=4\n"); + String grammar = + "grammar T;\n" + + "\n" + + "options{\n" + + " tokenVocab=T2;\n" + + "}\n" + + "tokens{\n" + + " Backslash = '\\\\';\n" + + "}\n" + + "main : '\\\\' EOF;"; + boolean success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false); + assertFalse(success); + } + + /** + * Regression test for antlr/antlr3#163 - NullPointerException when literal + * of a token is not escaped + */ + @Test + public void testTokenVocabWithEscapedBackslash(){ + mkdir(tmpdir); + writeFile(tmpdir, "T2.tokens", "Backslash=4\n'\\\\\\\\'=4\n"); + String grammar = + "grammar T;\n" + + "\n" + + "options{\n" + + " tokenVocab=T2;\n" + + "}\n" + + "tokens{\n" + + " Backslash = '\\\\';\n" + + "}\n" + + "main : '\\\\' 'another token, reason see below' EOF;"; + /* needed to insert another token since pull request #157 is not yet merged + * No lexer would be generated without the additional token since a tokenVocab + * was defined which covers all tokens used in the grammar + * See https://github.com/antlr/antlr3/pull/157 for more information + */ + boolean success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false); + assertTrue(success); + } + + /** + * Regression test for antlr/antlr3#163 - NullPointerException when literal + * of a token is not escaped + */ + @Test + public void testTokenVocabWithBackslashReusedInOtherGrammar(){ + String grammar = + "grammar T2;\n" + + "tokens{\n" + + " Backslash = '\\\\';\n" + + "}\n" + + "main : '\\\\' EOF;"; + + boolean success = rawGenerateAndBuildRecognizer("T2.g", grammar, "T2Parser", "T2Lexer", false); + assertTrue(success); + grammar = + "grammar T;\n" + + "\n" + + "options{\n" + + " tokenVocab=T2;\n" + + "}\n" + + "tokens{\n" + + " Backslash = '\\\\';\n" + + "}\n" + + "main : '\\\\' 'another token, reason see below' EOF;"; + /* needed to insert another token since pull request #157 is not yet merged + * No lexer would be generated without the additional token since a tokenVocab + * was defined which covers all tokens used in the grammar + * See https://github.com/antlr/antlr3/pull/157 for more information + */ + success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false); + assertTrue(success); + } } From 29f37cfa25734feb7eb1f894859fc536035736dc Mon Sep 17 00:00:00 2001 From: Robert Stoll Date: Mon, 29 Sep 2014 17:04:06 +0200 Subject: [PATCH 3/3] removed unnecessary import --- tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java | 1 - 1 file changed, 1 deletion(-) diff --git a/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java b/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java index fd07db94b..4b51aebf3 100644 --- a/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java +++ b/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java @@ -29,7 +29,6 @@ import org.junit.Test; -import static org.hamcrest.core.IsInstanceOf.instanceOf; import static org.junit.Assert.*; /** General code generation testing; compilation and/or execution.