Skip to content

Commit

Permalink
langkit/parsers.py: support Cut in Opt subparser
Browse files Browse the repository at this point in the history
Improve error recovery of incomplete code parsing by allowing Cut
parser in Opt ones.

TN: S201-022
  • Loading branch information
thvnx committed Aug 1, 2022
1 parent de0d147 commit 540f2d3
Show file tree
Hide file tree
Showing 8 changed files with 620 additions and 17 deletions.
70 changes: 68 additions & 2 deletions langkit/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,18 +615,42 @@ def traverse_nobacktrack(
variable if necessary, indicating which parsers should not backtrack.
"""
if isinstance(self, Cut):
# Do not create a new variable for consecutive Cuts

if nobt:
self.no_backtrack = nobt
else:
self.no_backtrack = VarDef('nobt', T.Bool, reinit=True)

for c in self.children:
nobt = c.traverse_nobacktrack(self.no_backtrack)
# Or parsers are a stop point for Cut

if nobt and not isinstance(self, Or):
# Or and Opt parsers are stop points for Cut:
#
# * For Or(A, B, ...) parsers, the effect of a Cut in A/B/... must
# stop when parsing returns from A/B/..., so we do not want the
# no_backtrack variable to be propagated from A to B, etc. and
# from A/B/... to the Or parser itself.
#
# * For Parser(A, Opt(B), Opt(C), ...) parsers, the effect of a Cut
# in A/... must stop when parsing B/C, so we do not want the
# no_backtrack variable to be propagated from A/... to B/C. On
# the other hand, a Cut in B should be propagated to the Parser
# itself, which includes A/... parsers, but not C (i.e. the
# effect of a Cut in B or C must not affect C or B, respectively,
# but only their parent parser Parser).

if nobt and not isinstance(self, Or) and not isinstance(c, Opt):
self.no_backtrack = nobt

# If c is an Opt parser that contains a Cut, the no_backtrack value
# of c will be propagated to self: create a no_backtrack variable
# in self to hold the propagated value if no Cut has been defined
# at this point in self yet.

if nobt and not self.no_backtrack and isinstance(c, Opt):
self.no_backtrack = VarDef('nobt', T.Bool, reinit=True)

return self.no_backtrack

def create_vars_after(self, start_pos: VarDef) -> None:
Expand Down Expand Up @@ -2385,6 +2409,48 @@ class Cut(Parser):
function Foo is -- This function decl will be parsed correctly
print("lol")
end
Still in the perspective of better error recovery, a ``Cut`` parser is also
allowed in an ``Opt`` parser in order to prevent backtracking even when an
``Opt`` parser fails. Here is an example of how to use the ``Cut`` parser
in an ``Opt`` one::
body=Body(Opt("scope", identifier), "begin", stmts_list, "end")
In this case, if we try to parse the input ``"scope begin [stmts] end"``,
it will fail because of the missing ``identifier`` field, the ``Opt``
parser will backtrack and the ``scope`` keyword will report an error.
Nevertheless, it can be improved thanks to a ``Cut``::
body=Body(Opt("scope", Cut(), identifier), "begin", stmts_list, "end")
Now, the parser will not backtrack and produce an incomplete node, taking
into account the ``Opt`` part. The error will now concern the
``identifier`` field being absent instead of complaining about the
``scope`` keyword. This also means that on the simple input: ``"scope"``,
the parser won't backtrack and produce an incomplete ``Body`` node.
Note that the ``Cut`` parser only applies to the ``Opt`` parser it is
defined in, therefore, the parser will backtrack on the following input:
``"begin end"``. Here, the parser will fail because of the missing
``stmts_list`` field. Several ``Cut`` parsers can be used to improve error
recovery in that case. Rewriting the rule as::
body=Body(Opt("scope", Cut(), identifier),
"begin", Cut(), stmts_list, "end")
will allow the parser to properly parse the incomplete input, reporting the
missing ``stmts_list`` field. Moreover, if no ``Cut`` is defined in the
``Opt`` parser::
body=Body(Opt("scope", identifier), "begin", Cut(), stmts_list, "end")
The ``Cut`` in the ``Body`` parser has no effect in the ``Opt`` part, which
means that the following input: ``"scope begin end"``, will produce a
parsing error and won't recover anything from the ``Opt`` parser: the
``identifier`` being absent, the ``Opt`` parser will fail and backtrack,
the ``scope`` keyword will be reported as en error, and, the ``begin end``
will be incompletely parsed (no backtrack because of the ``Cut``).
"""

def discard(self) -> bool:
Expand Down
83 changes: 68 additions & 15 deletions langkit/templates/parsers/opt_code_ada.mako
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,49 @@ if parser._booleanize:
alt_true, alt_false = base._alternatives
%>

<%def name="no_backtrack_failure()">
## Code to execute for error recovery inside the opt parser: set parser
## position to the last failure position and emit a diagnostic.
if ${parser.no_backtrack} then
${subparser.pos_var} := Parser.Last_Fail.Pos;
Append (Parser.Diagnostics,
Sloc_Range (Parser.TDH.all,
Get_Token (Parser.TDH.all, ${subparser.pos_var})),
To_Text ("Cannot parse <${parser.name}>"));
Add_Last_Fail_Diagnostic (Parser);
end if;
</%def>

<%def name="init_empty_list()">
${subparser.res_var} :=
${parser_type.parser_allocator} (Parser.Mem_Pool);
Initialize
(Self => ${parser.res_var},
Kind => ${parser_type.ada_kind_name},
Unit => Parser.Unit,
Token_Start_Index => ${parser.start_pos} - 1,
Token_End_Index => No_Token_Index);
Initialize_List
(Self => ${subparser.res_var},
Parser => Parser,
Count => 0);
</%def>

<%def name="discard_res_var()">
${subparser.res_var} := ${parser_type.storage_nullexpr};
</%def>

<%def name="reset_pos_var()">
${subparser.pos_var} := ${parser.start_pos};
</%def>

${subparser.generate_code()}

if ${subparser.pos_var} = No_Token_Index then
## The subparser failed to match the input: produce result for the empty
## sequence.
## or incomplete sequence.

% if parser._booleanize:
% if base.is_bool_type:
Expand All @@ -31,20 +69,29 @@ if ${subparser.pos_var} = No_Token_Index then
Token_End_Index => No_Token_Index);
% endif
% elif parser_type and parser_type.is_list_type:
${subparser.res_var} :=
${parser_type.parser_allocator} (Parser.Mem_Pool);
Initialize
(Self => ${parser.res_var},
Kind => ${parser_type.ada_kind_name},
Unit => Parser.Unit,
Token_Start_Index => ${parser.start_pos} - 1,
Token_End_Index => No_Token_Index);
Initialize_List
(Self => ${subparser.res_var},
Parser => Parser,
Count => 0);
% if parser.no_backtrack:
${no_backtrack_failure()}

## Init an empty list if the subparser failed
if ${subparser.res_var} = ${parser_type.storage_nullexpr} then
${init_empty_list()}
end if;
% else:
## Backtrack case: discard subparser result (init an empty list)
${init_empty_list()}
% endif
% elif parser_type:
${subparser.res_var} := ${parser_type.storage_nullexpr};
% if parser.no_backtrack:
${no_backtrack_failure()}

## Backtrack case: discard subparser result
if not ${parser.no_backtrack} then
${discard_res_var()}
end if;
% else:
## Backtrack case: discard subparser result
${discard_res_var()}
% endif
% endif

% if parser._is_error:
Expand All @@ -56,7 +103,13 @@ if ${subparser.pos_var} = No_Token_Index then
To_Text ("Missing '${subparser.error_repr}'"));
% endif

${subparser.pos_var} := ${parser.start_pos};
% if parser.no_backtrack:
if not ${parser.no_backtrack} then
${reset_pos_var()}
end if;
% else:
${reset_pos_var()}
% endif

% if parser._booleanize:
else
Expand Down
6 changes: 6 additions & 0 deletions langkit/templates/parsers/row_code_ada.mako
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@ ${parser.pos_var} := ${parser.start_pos};
## Parse the element
${subparser.generate_code()}

## Propagate no_backtrack information. If a subparser sets its no_backtrack
## variable, it should propagate the result to its parent.
% if subparser.no_backtrack and parser.no_backtrack:
${parser.no_backtrack} := ${subparser.no_backtrack};
% endif

% if parser.progress_var:
${parser.progress_var} := ${num};
% endif
Expand Down
47 changes: 47 additions & 0 deletions testsuite/tests/grammar/multiple_cuts/expected_concrete_syntax.lkt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import lexer_example

@with_lexer(foo_lexer)
grammar foo_grammar {
@main_rule stmt_rule <- list*(or(def | var | dot | comma))
id <- Id(@identifier)
def <- Def(
"def"
/ id ?pick("(" / id ")") ?pick("{" / id "}")
)
var <- Var(
"var" / id ?pick("(" / list+(id, ",") ")")
)
dot <- Dot(
"." id ?pick("(" / id ")") ?pick("{" / id "}")
)
comma <- Comma(?pick("(" / id ")") "," id id)
}

@abstract class FooNode implements Node[FooNode] {
}

class Comma : FooNode {
@parse_field id1: Id
@parse_field id2: Id
@parse_field id3: Id
}

class Def : FooNode {
@parse_field id1: Id
@parse_field id2: Id
@parse_field id3: Id
}

class Dot : FooNode {
@parse_field id1: Id
@parse_field id2: Id
@parse_field id3: Id
}

class Id : FooNode implements TokenNode {
}

class Var : FooNode {
@parse_field id: Id
@parse_field ids: ASTList[FooNode, Id]
}
50 changes: 50 additions & 0 deletions testsuite/tests/grammar/multiple_cuts/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import libfoolang


inputs = [
('complete case 1', "def a"),
('complete case 2', "def a (b)"),
('complete case 3', "def a (b) {c}"),
('complete case 4', "var a"),
('complete case 5', "var a (b)"),
('complete case 6', "var a (b, c, d)"),
('complete case 7', ". a (b)"),
('complete case 8', ". a (b) {c}"),
('complete case 9', ", a b"),
('complete case 10', "(a) , b c"),
# The def and var rules check that incomplete results are produced
# regarding the presence of several cut parsers.
('incomplete case 1', "def"),
('incomplete case 2', "def a (b"),
('incomplete case 3', "def a (b) {c"),
('incomplete case 4', "def a ("),
('incomplete case 5', "def a (b) {"),
('incomplete case 6', "def a ( {"),
('incomplete case 7', "def a (b {c"),
('incomplete case 8', "var"),
('incomplete case 9', "var a ("),
('incomplete case 10', "var a ()"),
('incomplete case 11', "var a (b, c, d"),
# The dot rule checks that an incomplete result is produced if only the
# optional part can set the no_backtracing variable.
('incomplete case 12', ". a (b"),
('incomplete case 13', ". a (b) {"),
('incomplete case 14', ". a ( {"),
# The comma rule is similar to the dot one but the optional part is at the
# beginning of the rule.
('incomplete case 15', ", b"),
('incomplete case 16', "(a) , b"),
('incomplete case 17', "(a , b"),
]

ctx = libfoolang.AnalysisContext()

for name, text in inputs:
print(f"=== {name}: {text} ===")
print()
u = ctx.get_from_buffer("buffer", buffer=text)

for d in u.diagnostics:
print(d)
u.root.dump()
print()
Loading

0 comments on commit 540f2d3

Please sign in to comment.