diff --git a/tool/src/main/java/org/antlr/codegen/CodeGenerator.java b/tool/src/main/java/org/antlr/codegen/CodeGenerator.java index b279bd565..ac2741707 100644 --- a/tool/src/main/java/org/antlr/codegen/CodeGenerator.java +++ b/tool/src/main/java/org/antlr/codegen/CodeGenerator.java @@ -909,6 +909,7 @@ protected ST genTokenVocabOutput() { // now dump the strings for (String literal : grammar.getStringLiterals()) { int tokenType = grammar.getTokenType(literal); + literal = literal.replace("\\", "\\\\"); if ( tokenType>=Label.MIN_TOKEN_TYPE ) { vocabFileST.addAggr("tokens.{name,type}", literal, Utils.integer(tokenType)); } diff --git a/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java b/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java index 5a189cdd4..4b51aebf3 100644 --- a/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java +++ b/tool/src/test/java/org/antlr/test/TestJavaCodeGeneration.java @@ -158,4 +158,87 @@ public void testSemanticPredicateAnalysisStackOverflow() throws Exception { boolean success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false); assertTrue(success); } + + /** + * Regression test for antlr/antlr3#163 - NullPointerException when literal + * of a token is not escaped + */ + @Test + public void testTokenVocabWithUnescapedBackslashThrowsNullPointerException(){ + mkdir(tmpdir); + writeFile(tmpdir, "T2.tokens", "Backslash=4\n'\\\\'=4\n"); + String grammar = + "grammar T;\n" + + "\n" + + "options{\n" + + " tokenVocab=T2;\n" + + "}\n" + + "tokens{\n" + + " Backslash = '\\\\';\n" + + "}\n" + + "main : '\\\\' EOF;"; + boolean success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false); + assertFalse(success); + } + + /** + * Regression test for antlr/antlr3#163 - NullPointerException when literal + * of a token is not escaped + */ + @Test + public void testTokenVocabWithEscapedBackslash(){ + mkdir(tmpdir); + writeFile(tmpdir, "T2.tokens", "Backslash=4\n'\\\\\\\\'=4\n"); + String grammar = + "grammar T;\n" + + "\n" + + "options{\n" + + " tokenVocab=T2;\n" + + "}\n" + + "tokens{\n" + + " Backslash = '\\\\';\n" + + "}\n" + + "main : '\\\\' 'another token, reason see below' EOF;"; + /* needed to insert another token since pull request #157 is not yet merged + * No lexer would be generated without the additional token since a tokenVocab + * was defined which covers all tokens used in the grammar + * See https://github.com/antlr/antlr3/pull/157 for more information + */ + boolean success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false); + assertTrue(success); + } + + /** + * Regression test for antlr/antlr3#163 - NullPointerException when literal + * of a token is not escaped + */ + @Test + public void testTokenVocabWithBackslashReusedInOtherGrammar(){ + String grammar = + "grammar T2;\n" + + "tokens{\n" + + " Backslash = '\\\\';\n" + + "}\n" + + "main : '\\\\' EOF;"; + + boolean success = rawGenerateAndBuildRecognizer("T2.g", grammar, "T2Parser", "T2Lexer", false); + assertTrue(success); + grammar = + "grammar T;\n" + + "\n" + + "options{\n" + + " tokenVocab=T2;\n" + + "}\n" + + "tokens{\n" + + " Backslash = '\\\\';\n" + + "}\n" + + "main : '\\\\' 'another token, reason see below' EOF;"; + /* needed to insert another token since pull request #157 is not yet merged + * No lexer would be generated without the additional token since a tokenVocab + * was defined which covers all tokens used in the grammar + * See https://github.com/antlr/antlr3/pull/157 for more information + */ + success = rawGenerateAndBuildRecognizer("T.g", grammar, "TParser", "TLexer", false); + assertTrue(success); + } }