From 804f09cacd7cd14704fb5b9142ecf3e698f8e875 Mon Sep 17 00:00:00 2001 From: Haozhun Jin Date: Thu, 19 Mar 2015 15:15:14 -0700 Subject: [PATCH] Support \p 1-char char classes without curly braces --- src/org/joni/Lexer.java | 21 ++++++++++++++------ src/org/joni/Syntax.java | 8 +++++++- src/org/joni/Token.java | 7 +++++++ src/org/joni/constants/SyntaxProperties.java | 1 + 4 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java index 0b702714..d2fda3b3 100644 --- a/src/org/joni/Lexer.java +++ b/src/org/joni/Lexer.java @@ -956,6 +956,10 @@ private void fetchTokenFor_charProperty() { unfetch(); } } + } else if (syntax.op2EscPCharCharProperty()) { + token.type = TokenType.CHAR_PROPERTY; + token.setPropNot(c == 'P'); + token.setPropSingleChar(true); } else { syntaxWarn(Warnings.INVALID_UNICODE_PROPERTY, (char)c); } @@ -1252,13 +1256,18 @@ private void possessiveCheck() { protected final int fetchCharPropertyToCType() { mark(); - while (left()) { - int last = p; + if (token.getPropSingleChar()) { fetch(); - if (c == '}') { - return enc.propertyNameToCType(bytes, _p, last); - } else if (c == '(' || c == ')' || c == '{' || c == '|') { - throw new CharacterPropertyException(ERR_INVALID_CHAR_PROPERTY_NAME, bytes, _p, last); + return enc.propertyNameToCType(bytes, _p, p); + } else { + while (left()) { + int last = p; + fetch(); + if (c == '}') { + return enc.propertyNameToCType(bytes, _p, last); + } else if (c == '(' || c == ')' || c == '{' || c == '|') { + throw new CharacterPropertyException(ERR_INVALID_CHAR_PROPERTY_NAME, bytes, _p, last); + } } } newInternalException(ERR_PARSER_BUG); diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java index 4e7b5e77..c0d73ba4 100644 --- a/src/org/joni/Syntax.java +++ b/src/org/joni/Syntax.java @@ -282,6 +282,11 @@ public boolean op2OptionECMAScript() { return isOp2(OP2_OPTION_ECMASCRIPT); } + public boolean op2EscPCharCharProperty() + { + return isOp2(OP2_ESC_P_BRACE_CHAR_PROPERTY); + } + /** * BEHAVIOR * @@ -535,7 +540,8 @@ public boolean warnReduntantNestedRepeat() { OP2_OPTION_PERL | OP2_PLUS_POSSESSIVE_REPEAT | OP2_PLUS_POSSESSIVE_INTERVAL | OP2_CCLASS_SET_OP | OP2_ESC_V_VTAB | OP2_ESC_U_HEX4 | - OP2_ESC_P_BRACE_CHAR_PROPERTY ), + OP2_ESC_P_BRACE_CHAR_PROPERTY | + OP2_ESC_P_CHAR_CHAR_PROPERTY), ( GNU_REGEX_BV | DIFFERENT_LEN_ALT_LOOK_BEHIND ), diff --git a/src/org/joni/Token.java b/src/org/joni/Token.java index 8ad73304..276ff210 100644 --- a/src/org/joni/Token.java +++ b/src/org/joni/Token.java @@ -169,4 +169,11 @@ boolean getPropNot() { void setPropNot(boolean not) { INT2 = not ? 1 : 0; } + + boolean getPropSingleChar() { + return INT3 != 0; + } + void setPropSingleChar(boolean singleChar) { + INT3 = singleChar ? 1 : 0; + } } diff --git a/src/org/joni/constants/SyntaxProperties.java b/src/org/joni/constants/SyntaxProperties.java index 075324c0..88f9830c 100644 --- a/src/org/joni/constants/SyntaxProperties.java +++ b/src/org/joni/constants/SyntaxProperties.java @@ -75,6 +75,7 @@ public interface SyntaxProperties { final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */ final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */ final int OP2_OPTION_ECMASCRIPT = (1<<21); /* EcmaScript quirks */ + final int OP2_ESC_P_CHAR_CHAR_PROPERTY = (1<<22); /* \pX, \PX */ /* syntax (behavior); */ final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */