Skip to content

Commit

Permalink
correct the two-stage parsing strategy of antlr parser
Browse files Browse the repository at this point in the history
  • Loading branch information
cfmcgrady committed Nov 10, 2023
1 parent 616af05 commit 65ccc93
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 12 deletions.
11 changes: 2 additions & 9 deletions spark/src/main/antlr4/io/delta/sql/parser/DeltaSqlBase.g4
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ statement
| ALTER TABLE table=qualifiedName
DROP FEATURE featureName=featureNameValue (TRUNCATE HISTORY)? #alterTableDropFeature
| OPTIMIZE (path=STRING | table=qualifiedName)
(WHERE partitionPredicate=predicateToken)?
(WHERE partitionPredicate=exprToken)?
(zorderSpec)? #optimizeTable
| REORG TABLE table=qualifiedName
(WHERE partitionPredicate=predicateToken)?
(WHERE partitionPredicate=exprToken)?
APPLY LEFT_PAREN PURGE RIGHT_PAREN #reorgTable
| cloneTableHeader SHALLOW CLONE source=qualifiedName clause=temporalClause?
(TBLPROPERTIES tableProps=propertyList)?
Expand Down Expand Up @@ -198,13 +198,6 @@ constraint
: CHECK '(' exprToken+ ')' #checkConstraint
;

// We don't have an expression rule in our grammar here, so we just grab the tokens and defer
// parsing them to later. Although this is the same as `exprToken`, we have to re-define it to
// workaround an ANTLR issue (https://github.com/delta-io/delta/issues/1205)
predicateToken
: .+?
;

// We don't have an expression rule in our grammar here, so we just grab the tokens and defer
// parsing them to later.
exprToken
Expand Down
10 changes: 7 additions & 3 deletions spark/src/main/scala/io/delta/sql/parser/DeltaSqlParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,22 @@ class DeltaSqlParser(val delegate: ParserInterface) extends ParserInterface {
parser.removeErrorListeners()
parser.addErrorListener(ParseErrorListener)

// https://github.com/antlr/antlr4/issues/192#issuecomment-15238595
// Save a great deal of time on correct inputs by using a two-stage parsing strategy.
try {
try {
// first, try parsing with potentially faster SLL mode
// first, try parsing with potentially faster SLL mode and BailErrorStrategy
parser.setErrorHandler(new BailErrorStrategy)
parser.getInterpreter.setPredictionMode(PredictionMode.SLL)
toResult(parser)
} catch {
case e: ParseCancellationException =>
// if we fail, parse with LL mode
// if we fail, parse with LL mode with DefaultErrorStrategy
tokenStream.seek(0) // rewind input stream
parser.reset()

// Try Again.
parser.setErrorHandler(new DefaultErrorStrategy)
parser.getInterpreter.setPredictionMode(PredictionMode.LL)
toResult(parser)
}
Expand Down Expand Up @@ -469,7 +473,7 @@ class DeltaSqlAstBuilder extends DeltaSqlBaseBaseVisitor[AnyRef] {
tokens.map(_.getText).mkString(" ")
}

private def extractRawText(exprContext: ParserRuleContext): String = {
private def extractRawText(exprContext: ExprTokenContext): String = {
// Extract the raw expression which will be parsed later
exprContext.getStart.getInputStream.getText(new Interval(
exprContext.getStart.getStartIndex,
Expand Down

0 comments on commit 65ccc93

Please sign in to comment.