From a25f0b9b2f91635cdf02b4f049cab06c144b54ee Mon Sep 17 00:00:00 2001 From: apalala Date: Tue, 28 Nov 2023 18:00:43 -0400 Subject: [PATCH 01/51] [tool] build path for new code generation --- tatsu/ngcodegen/__init__.py | 5 +++++ .../__pycache__/__init__.cpython-312.pyc | Bin 0 -> 385 bytes tatsu/tool.py | 13 +++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 tatsu/ngcodegen/__init__.py create mode 100644 tatsu/ngcodegen/__pycache__/__init__.cpython-312.pyc diff --git a/tatsu/ngcodegen/__init__.py b/tatsu/ngcodegen/__init__.py new file mode 100644 index 00000000..6cfa5789 --- /dev/null +++ b/tatsu/ngcodegen/__init__.py @@ -0,0 +1,5 @@ +from ..objectmodel import ParseModel + + +def codegen(model: ParseModel) -> str: + return '** NG CODEGEN NOT IMPLEMENTED YET **' diff --git a/tatsu/ngcodegen/__pycache__/__init__.cpython-312.pyc b/tatsu/ngcodegen/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6f218c97361f49150f7f3e88e51d777700e312f GIT binary patch literal 385 zcmXv~Jx{|h5cMVL2d(-GSS-;c8Hhkk2(i$pLLzC0GSDRoiCYz=X?1poGIihwuytkS zx9|s0Bqk=dNrI7jJOy7$hy_wMYEPN&5v{-TN45Ps|85LQJBPGrjxmO#qpFa}_W zAs{h@F|iWs#TuLInqB`fsLh7FHH=USf02kZugy!Sqfjf%VyGMsU=k_xm@Qj_Z3NF< z$KgSr-wb+Q-wSv!81Y+wc<1?EF!Fl*!5eYMd8a9;RZ{!${e^ok1xhzsMkyUPLglJR z$yJrJX-r#8MdpU#ESsq?JYBByMsY_s0^7>li^nGstMU=3m(5M7 xuvB1!O4Of+O-lRb6?Lsrag7}+^ Date: Tue, 28 Nov 2023 19:24:33 -0400 Subject: [PATCH 02/51] [walkrers] make walk_children public --- tatsu/walkers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tatsu/walkers.py b/tatsu/walkers.py index 0406092e..02423c02 100644 --- a/tatsu/walkers.py +++ b/tatsu/walkers.py @@ -40,7 +40,7 @@ def walk(self, node: Node | list[Node], *args, **kwargs) -> Any: else: return node - def _walk_children(self, node: Node, *args, **kwargs): + def walk_children(self, node: Node, *args, **kwargs): if not isinstance(node, Node): return () @@ -106,7 +106,7 @@ class PreOrderWalker(NodeWalker): def walk(self, node, *args, **kwargs): result = super().walk(node, *args, **kwargs) if result is not None: - self._walk_children(node, *args, **kwargs) + self.walk_children(node, *args, **kwargs) return result From d87ac12a06aa932208923d6ef950eafecd5261dc Mon Sep 17 00:00:00 2001 From: apalala Date: Tue, 28 Nov 2023 19:25:15 -0400 Subject: [PATCH 03/51] [grammar] fix name to TatSu --- grammar/tatsu.ebnf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grammar/tatsu.ebnf b/grammar/tatsu.ebnf index 340b4a14..a70e42fc 100644 --- a/grammar/tatsu.ebnf +++ b/grammar/tatsu.ebnf @@ -1,4 +1,4 @@ -@@grammar :: Tatsu +@@grammar :: TatSu @@whitespace :: /\s+/ @@comments :: ?"(?sm)[(][*](?:.|\n)*?[*][)]" @@eol_comments :: ?"#[^\n]*$" From caf280aa8d781f54e650a20139d677b16db7dcc0 Mon Sep 17 00:00:00 2001 From: apalala Date: Tue, 28 Nov 2023 19:34:04 -0400 Subject: [PATCH 04/51] [ngcodegen] up to Buffer definition --- tatsu/ngcodegen/__init__.py | 6 +- .../__pycache__/__init__.cpython-312.pyc | Bin 385 -> 553 bytes tatsu/ngcodegen/python.py | 95 ++++++++++++++++++ 3 files changed, 100 insertions(+), 1 deletion(-) create mode 100644 tatsu/ngcodegen/python.py diff --git a/tatsu/ngcodegen/__init__.py b/tatsu/ngcodegen/__init__.py index 6cfa5789..d34d1dba 100644 --- a/tatsu/ngcodegen/__init__.py +++ b/tatsu/ngcodegen/__init__.py @@ -1,5 +1,9 @@ from ..objectmodel import ParseModel +from .python import PythonCodeGenerator + def codegen(model: ParseModel) -> str: - return '** NG CODEGEN NOT IMPLEMENTED YET **' + generator = PythonCodeGenerator() + generator.walk(model) + return generator.printed_text() diff --git a/tatsu/ngcodegen/__pycache__/__init__.cpython-312.pyc b/tatsu/ngcodegen/__pycache__/__init__.cpython-312.pyc index a6f218c97361f49150f7f3e88e51d777700e312f..2b74bbcf876685c28ccc014971dae6efe87473da 100644 GIT binary patch literal 553 zcmZ8d&r1S96rOQsS99GE?9wF^l~{=OQkRG>9U>wTA_y1@BRAT@;>WuI_l*qUEMcv(p z*a3AZvv{5Pa!pWtg;CyQZc$JdB7q_P}xMLrf8=rSf86PwbaWPPdFxk224&8kPgkuoySE z#l2>LZn+4HL78Ei6}Q4obFTzC7GtPa(c zH#PO5rUq*InWQH`L^)mY)%tmr7WwRMOl$;5Qw* j#}eR*>v(GbV+mvYi89}4eXK}0)7=>(P!H)Jp=R~JN_>9+ delta 310 zcmZ3<(#XtvnwOW00SLY&rlmGa*^Qc>Y@ Date: Tue, 28 Nov 2023 19:55:35 -0400 Subject: [PATCH 05/51] [mixins][indent] allow control over the amount of indentation --- tatsu/mixins/indent.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tatsu/mixins/indent.py b/tatsu/mixins/indent.py index 78aee689..6cf78cde 100644 --- a/tatsu/mixins/indent.py +++ b/tatsu/mixins/indent.py @@ -5,9 +5,9 @@ class IndentPrintMixin: - def __init__(self, indent=4): + def __init__(self, indent: int = 4): self.indent_amount = indent - self.indent_level = 0 + self.indent_stack: list[int] = [0] self.output_stream = io.StringIO() def printed_text(self): @@ -26,16 +26,20 @@ def as_printed_lines(self, *args, **kwargs): return self.indented_lines(text) @contextmanager - def indent(self): - self.indent_level += 1 + def indent(self, amount: int | None = None): + assert amount is None or amount >= 0 + if amount is None: + amount = self.indent_amount + + self.indent_stack.append(amount + self.indent_stack[-1]) try: yield finally: - self.indent_level -= 1 + self.indent_stack.pop() @property def current_indentation(self): - return ' ' * self.indent_amount * self.indent_level + return ' ' * self.indent_stack[-1] @staticmethod def io_print(*args, **kwargs): From 75d43b6013739e24297e1d477ba23d47629627e1 Mon Sep 17 00:00:00 2001 From: apalala Date: Tue, 28 Nov 2023 19:56:07 -0400 Subject: [PATCH 06/51] [ngcodegen] refactor --- tatsu/ngcodegen/python.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 77b6e705..b52a1f7f 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -45,9 +45,17 @@ def walk_Grammar(self, grammar: grammars.Grammar): self.print() self.print() - keywords = grammar.keywords - keywords = ['HERE'] - keywords = [str(k) for k in keywords if k is not None] + self._gen_keywords(grammar) + self._gen_buffering(grammar) + + self.print( + ''' + ** AT GRAMMAR + ''' + ) + + def _gen_keywords(self, grammar: grammars.Grammar): + keywords = [str(k) for k in grammar.keywords if k is not None] if not keywords: self.print('KEYWORDS: set[str] = set()') else: @@ -58,14 +66,6 @@ def walk_Grammar(self, grammar: grammars.Grammar): self.print() self.print() - self._gen_buffering(grammar) - - self.print( - ''' - ** AT GRAMMAR - ''' - ) - def _gen_buffering(self, grammar: grammars.Grammar): self.print(f'class {grammar.name}Buffer(Buffer):') start = grammar.config.start or grammar.rules[0].name From e8cfa3b3a9109dbcbacef7ffe06416264d3d2383 Mon Sep 17 00:00:00 2001 From: apalala Date: Tue, 28 Nov 2023 20:44:49 -0400 Subject: [PATCH 07/51] [mixins][indent] clarify and refactor --- tatsu/mixins/indent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tatsu/mixins/indent.py b/tatsu/mixins/indent.py index 6cf78cde..d4be8b28 100644 --- a/tatsu/mixins/indent.py +++ b/tatsu/mixins/indent.py @@ -5,8 +5,8 @@ class IndentPrintMixin: - def __init__(self, indent: int = 4): - self.indent_amount = indent + def __init__(self, default_indent: int = 4): + self.default_indent = default_indent self.indent_stack: list[int] = [0] self.output_stream = io.StringIO() @@ -29,7 +29,7 @@ def as_printed_lines(self, *args, **kwargs): def indent(self, amount: int | None = None): assert amount is None or amount >= 0 if amount is None: - amount = self.indent_amount + amount = self.default_indent self.indent_stack.append(amount + self.indent_stack[-1]) try: From feff967a8d3c7a2bf5477f9550bdc2484258c2cf Mon Sep 17 00:00:00 2001 From: apalala Date: Tue, 28 Nov 2023 20:46:56 -0400 Subject: [PATCH 08/51] [lint] resolve warnings --- tatsu/ngcodegen/__init__.py | 1 - tatsu/ngcodegen/python.py | 13 ++++++------- tatsu/tool.py | 3 ++- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tatsu/ngcodegen/__init__.py b/tatsu/ngcodegen/__init__.py index d34d1dba..0c8c1d96 100644 --- a/tatsu/ngcodegen/__init__.py +++ b/tatsu/ngcodegen/__init__.py @@ -1,5 +1,4 @@ from ..objectmodel import ParseModel - from .python import PythonCodeGenerator diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index b52a1f7f..5650ff81 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -5,8 +5,7 @@ from ..util import trim from ..walkers import NodeWalker - -HEADER= """\ +HEADER = """\ #!/usr/bin/env python # WARNING: @@ -51,7 +50,7 @@ def walk_Grammar(self, grammar: grammars.Grammar): self.print( ''' ** AT GRAMMAR - ''' + ''', ) def _gen_keywords(self, grammar: grammars.Grammar): @@ -59,9 +58,9 @@ def _gen_keywords(self, grammar: grammars.Grammar): if not keywords: self.print('KEYWORDS: set[str] = set()') else: - keywords = '\n'.join(f' {k!r},' for k in keywords) - keywords = '{\n%s\n}' % keywords - self.print(f'KEYWORDS: set[str] = {keywords}') + keywords_str = '\n'.join(f' {k!r},' for k in keywords) + keywords_str = '{\n%s\n}' % keywords_str + self.print(f'KEYWORDS: set[str] = {keywords_str}') self.print() self.print() @@ -90,6 +89,6 @@ def _gen_buffering(self, grammar: grammars.Grammar): ) config = config.replace(**settings) super().__init__(text, config=config) - ''' + ''', ) diff --git a/tatsu/tool.py b/tatsu/tool.py index 3d56a566..2f19b72a 100644 --- a/tatsu/tool.py +++ b/tatsu/tool.py @@ -2,6 +2,7 @@ Parse and translate an EBNF grammar into a Python parser for the described language. """ +# ruff: noqa: PLR0912 from __future__ import annotations import argparse @@ -15,9 +16,9 @@ # we hook the tool to the Python code generator as the default from .codegen.python import codegen as pythoncg -from .ngcodegen import codegen as ngpythoncg from .exceptions import ParseException from .infos import ParserConfig +from .ngcodegen import codegen as ngpythoncg from .parser import GrammarGenerator from .semantics import ModelBuilderSemantics from .util import eval_escapes From 3a849fbfb2661203de04d76d8b5a340cc877dda5 Mon Sep 17 00:00:00 2001 From: apalala Date: Tue, 28 Nov 2023 20:52:49 -0400 Subject: [PATCH 09/51] cleanup --- .../__pycache__/__init__.cpython-312.pyc | Bin 553 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tatsu/ngcodegen/__pycache__/__init__.cpython-312.pyc diff --git a/tatsu/ngcodegen/__pycache__/__init__.cpython-312.pyc b/tatsu/ngcodegen/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 2b74bbcf876685c28ccc014971dae6efe87473da..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 553 zcmZ8d&r1S96rOQsS99GE?9wF^l~{=OQkRG>9U>wTA_y1@BRAT@;>WuI_l*qUEMcv(p z*a3AZvv{5Pa!pWtg;CyQZc$JdB7q_P}xMLrf8=rSf86PwbaWPPdFxk224&8kPgkuoySE z#l2>LZn+4HL78Ei6}Q4obFTzC7GtPa(c zH#PO5rUq*InWQH`L^)mY)%tmr7WwRMOl$;5Qw* j#}eR*>v(GbV+mvYi89}4eXK}0)7=>(P!H)Jp=R~JN_>9+ From 21cbaf991e269e83eceb119a1f7bb59e889ea908 Mon Sep 17 00:00:00 2001 From: apalala Date: Tue, 28 Nov 2023 21:43:00 -0400 Subject: [PATCH 10/51] [ngcodegen] allow naming the parser --- tatsu/ngcodegen/__init__.py | 4 ++-- tatsu/ngcodegen/python.py | 13 ++++++++----- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tatsu/ngcodegen/__init__.py b/tatsu/ngcodegen/__init__.py index 0c8c1d96..01d5264d 100644 --- a/tatsu/ngcodegen/__init__.py +++ b/tatsu/ngcodegen/__init__.py @@ -2,7 +2,7 @@ from .python import PythonCodeGenerator -def codegen(model: ParseModel) -> str: - generator = PythonCodeGenerator() +def codegen(model: ParseModel, parser_name: str = '') -> str: + generator = PythonCodeGenerator(parser_name=parser_name) generator.walk(model) return generator.printed_text() diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 5650ff81..5664f82b 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -8,9 +8,7 @@ HEADER = """\ #!/usr/bin/env python - # WARNING: - # - # CAVEAT UTILITOR + # WARNING: CAVEAT UTILITOR # # This file was automatically generated by TatSu. # @@ -35,6 +33,10 @@ class PythonCodeGenerator(IndentPrintMixin, NodeWalker): + def __init__(self, parser_name: str = ''): + super().__init__() + self.parser_name = parser_name + def print(self, *args, **kwargs): args = [trim(arg) for arg in args] super().print(*args, **kwargs) @@ -66,9 +68,10 @@ def _gen_keywords(self, grammar: grammars.Grammar): self.print() def _gen_buffering(self, grammar: grammars.Grammar): - self.print(f'class {grammar.name}Buffer(Buffer):') - start = grammar.config.start or grammar.rules[0].name + name = self.parser_name or grammar.name + self.print(f'class {name}Buffer(Buffer):') + start = grammar.config.start or grammar.rules[0].name with self.indent(): self.print('def __init__(self, text, /, config: ParserConfig | None = None, **settings):') with self.indent(): From 7074718f3d3e8a6154b3d1975d86e18580622c7f Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 06:43:35 -0400 Subject: [PATCH 11/51] [python] generate up to rule templates --- tatsu/ngcodegen/python.py | 154 ++++++++++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 14 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 5664f82b..0d659efc 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -1,8 +1,13 @@ from __future__ import annotations +import itertools +from collections.abc import Iterator +from typing import Any + +from ..collections import OrderedSet as oset from .. import grammars from ..mixins.indent import IndentPrintMixin -from ..util import trim +from ..util import trim, compress_seq, safe_name from ..walkers import NodeWalker HEADER = """\ @@ -30,30 +35,112 @@ from tatsu.util import re, generic_main """ +FOOTER = """\ +def main(filename, **kwargs): + if not filename or filename == '-': + text = sys.stdin.read() + else: + text = Path(filename).read_text() + parser = {name}Parser() + return parser.parse( + text, + filename=filename, + **kwargs, + ) + + +if __name__ == '__main__': + import json + from tatsu.util import asjson + + ast = generic_main(main, {name}Parser, name='{name}') + data = asjson(ast) + print(json.dumps(data, indent=2)) +""" + + class PythonCodeGenerator(IndentPrintMixin, NodeWalker): + _counter: Iterator[int] = itertools.count() def __init__(self, parser_name: str = ''): super().__init__() self.parser_name = parser_name + @classmethod + def counter(cls): + return next(cls._counter) + + @classmethod + def reset_counter(cls): + cls._counter = itertools.count() + def print(self, *args, **kwargs): - args = [trim(arg) for arg in args] + args = [trim(str(arg)) for arg in args] super().print(*args, **kwargs) + def walk_default(self, node: Any): + return node + def walk_Grammar(self, grammar: grammars.Grammar): + self.parser_name = self.parser_name or grammar.name self.print(HEADER) self.print() self.print() self._gen_keywords(grammar) self._gen_buffering(grammar) + self._gen_parsing(grammar) + + self.print() + self.print(FOOTER) + + def walk_Rule(self, rule: grammars.Rule): + def param_repr(p): + if isinstance(p, int | float): + return str(p) + else: + return repr(p.split('::')[0]) + + params = kwparams = '' + if rule.params: + params = ', '.join( + param_repr(self.walk(p)) for p in rule.params + ) + if rule.kwparams: + kwparams = ', '.join( + f'{k}={param_repr(self.walk(v))}' + for k, v in self.kwparams.items() + ) + + if params and kwparams: + params = params + ', ' + kwparams + elif kwparams: + params = kwparams + + sdefines = '' + if not isinstance(rule.exp, grammars.Choice): + sdefines = self._make_defines_declaration(rule) + leftrec = '\n@leftrec' if rule.is_leftrec else '' + nomemo = ( + '\n@nomemo' + if not rule.is_memoizable and not leftrec + else '' + ) + isname='\n@isname' if rule.is_name else '' self.print( - ''' - ** AT GRAMMAR - ''', + f""" + @tatsumasu({params})\ + {leftrec}\ + {nomemo}\ + {isname} + def _{rule.name}_(self): + """ ) + with self.indent(): + self.print(self.walk(rule.exp)) + self.print() def _gen_keywords(self, grammar: grammars.Grammar): keywords = [str(k) for k in grammar.keywords if k is not None] @@ -67,16 +154,11 @@ def _gen_keywords(self, grammar: grammars.Grammar): self.print() self.print() - def _gen_buffering(self, grammar: grammars.Grammar): - name = self.parser_name or grammar.name - self.print(f'class {name}Buffer(Buffer):') + def _gen_init(self, grammar: grammars.Grammar): start = grammar.config.start or grammar.rules[0].name - with self.indent(): - self.print('def __init__(self, text, /, config: ParserConfig | None = None, **settings):') - with self.indent(): - self.print( - f''' + self.print( + f''' config = ParserConfig.new( config, owner=self, @@ -93,5 +175,49 @@ def _gen_buffering(self, grammar: grammars.Grammar): config = config.replace(**settings) super().__init__(text, config=config) ''', - ) + ) + self.print() + + def _gen_buffering(self, grammar: grammars.Grammar): + self.print(f'class {self.parser_name}Buffer(Buffer):') + + with self.indent(): + self.print('def __init__(self, text, /, config: ParserConfig | None = None, **settings):') + with self.indent(): + self._gen_init(grammar) + self.print() + + + def _gen_parsing(self, grammar: grammars.Grammar): + self.print(f'class {self.parser_name}Parser(Parser):') + with self.indent(): + self.print('def __init__(self, /, config: ParserConfig | None = None, **settings):') + with self.indent(): + self._gen_init(grammar) + self.walk(grammar.rules) + + def _make_defines_declaration(self, node: grammars.Model): + defines = compress_seq(node.defines()) + ldefs = oset(safe_name(d) for d, value in defines if value) + sdefs = oset( + safe_name(d) + for d, value in defines + if not value and d not in ldefs + ) + if not (sdefs or ldefs): + return '' + else: + sdefs = '[%s]' % ', '.join(sorted(repr(d) for d in sdefs)) + ldefs = '[%s]' % ', '.join(sorted(repr(d) for d in ldefs)) + if not ldefs: + return f'\n\n self._define({sdefs}, {ldefs})' + else: + return '\n' + trim(self.define_template % (sdefs, ldefs)) + + define_template = """\ + self._define( + %s, + %s, + )\ + """ From fda59154adc7961699df6444c7ead9e9c52ff267 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 09:09:45 -0400 Subject: [PATCH 12/51] [lint] solve warnings --- tatsu/ngcodegen/python.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 0d659efc..a9809beb 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -4,10 +4,10 @@ from collections.abc import Iterator from typing import Any -from ..collections import OrderedSet as oset from .. import grammars +from ..collections import OrderedSet as oset from ..mixins.indent import IndentPrintMixin -from ..util import trim, compress_seq, safe_name +from ..util import compress_seq, safe_name, trim from ..walkers import NodeWalker HEADER = """\ @@ -110,7 +110,7 @@ def param_repr(p): if rule.kwparams: kwparams = ', '.join( f'{k}={param_repr(self.walk(v))}' - for k, v in self.kwparams.items() + for k, v in rule.kwparams.items() ) if params and kwparams: @@ -118,16 +118,17 @@ def param_repr(p): elif kwparams: params = kwparams - sdefines = '' - if not isinstance(rule.exp, grammars.Choice): - sdefines = self._make_defines_declaration(rule) + # sdefines = '' + # if not isinstance(rule.exp, grammars.Choice): + # sdefines = self._make_defines_declaration(rule) + leftrec = '\n@leftrec' if rule.is_leftrec else '' nomemo = ( '\n@nomemo' if not rule.is_memoizable and not leftrec else '' ) - isname='\n@isname' if rule.is_name else '' + isname = '\n@isname' if rule.is_name else '' self.print( f""" @@ -136,7 +137,7 @@ def param_repr(p): {nomemo}\ {isname} def _{rule.name}_(self): - """ + """, ) with self.indent(): self.print(self.walk(rule.exp)) @@ -208,12 +209,12 @@ def _make_defines_declaration(self, node: grammars.Model): if not (sdefs or ldefs): return '' else: - sdefs = '[%s]' % ', '.join(sorted(repr(d) for d in sdefs)) - ldefs = '[%s]' % ', '.join(sorted(repr(d) for d in ldefs)) + sdefs_str = '[%s]' % ', '.join(sorted(repr(d) for d in sdefs)) + ldefs_str = '[%s]' % ', '.join(sorted(repr(d) for d in ldefs)) if not ldefs: - return f'\n\n self._define({sdefs}, {ldefs})' + return f'\n\n self._define({sdefs_str}, {ldefs_str})' else: - return '\n' + trim(self.define_template % (sdefs, ldefs)) + return '\n' + trim(self.define_template % (sdefs_str, ldefs_str)) define_template = """\ self._define( From e71087fa322f935db1129ddf9331a4ba8a086560 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 09:44:56 -0400 Subject: [PATCH 13/51] [ngcodegen] add more node types to walker --- tatsu/ngcodegen/python.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index a9809beb..ec96aeed 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -143,6 +143,39 @@ def _{rule.name}_(self): self.print(self.walk(rule.exp)) self.print() + def walk_Void(self, void: grammars.Void): + self.print('self._void()') + + def walk_Any(self, any: grammars.Any): + self.print('self._any()') + + def walk_Fail(self, fail: grammars.Fail): + self.print('self._fail()') + + def walk_Comment(self, comment: grammars.Comment): + lines = '\n'.join(f'# {c!s}' for c in comment.comment.splitlines()) + self.print(f'\n{lines}\n') + + def walk_EOLComment(self, comment: grammars.EOLComment): + self.walk_Comment(comment) + + def walk_EOF(self, eof: grammars.EOF): + self.print('self._check_eof()') + + def walk_Group(self, group: grammars.Group): + self.print('with self._group():') + with self.indent(): + self.walk(group.exp) + + def walk_Token(self, token: grammars.Token): + self.print(f'self._token({token.token!r})') + + def walk_Constant(self, constant: grammars.Constant): + self.print(f'self._constant({constant.literal!r})') + + def walk_Alert(self, alert: grammars.Alert): + self.print(f'self._alert({alert.literal!r}, {alert.level})') + def _gen_keywords(self, grammar: grammars.Grammar): keywords = [str(k) for k in grammar.keywords if k is not None] if not keywords: From 0bdd9450bbe1cd251d5ea915452329180cb1ec62 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 10:16:40 -0400 Subject: [PATCH 14/51] [ngcodegen] add more node types to walker --- tatsu/ngcodegen/python.py | 78 +++++++++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index ec96aeed..1b01ad0f 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -1,6 +1,7 @@ from __future__ import annotations import itertools +import textwrap from collections.abc import Iterator from typing import Any @@ -76,7 +77,7 @@ def reset_counter(cls): cls._counter = itertools.count() def print(self, *args, **kwargs): - args = [trim(str(arg)) for arg in args] + args = [trim(str(arg)) for arg in args if arg is not None] super().print(*args, **kwargs) def walk_default(self, node: Any): @@ -118,9 +119,9 @@ def param_repr(p): elif kwparams: params = kwparams - # sdefines = '' - # if not isinstance(rule.exp, grammars.Choice): - # sdefines = self._make_defines_declaration(rule) + if not isinstance(rule.exp, grammars.Choice): + with self.indent(): + self._gen_defines_declaration(rule) leftrec = '\n@leftrec' if rule.is_leftrec else '' nomemo = ( @@ -130,6 +131,7 @@ def param_repr(p): ) isname = '\n@isname' if rule.is_name else '' + self.print() self.print( f""" @tatsumasu({params})\ @@ -141,7 +143,6 @@ def _{rule.name}_(self): ) with self.indent(): self.print(self.walk(rule.exp)) - self.print() def walk_Void(self, void: grammars.Void): self.print('self._void()') @@ -176,6 +177,45 @@ def walk_Constant(self, constant: grammars.Constant): def walk_Alert(self, alert: grammars.Alert): self.print(f'self._alert({alert.literal!r}, {alert.level})') + def walk_Pattern(self, pattern: grammars.Pattern): + self.print(f'self._pattern({pattern.pattern!r})') + + def walk_Lookahead(self, lookahead: grammars.Lookahead): + self.print('with self._if():') + with self.indent(): + self.walk(lookahead.exp) + + def walk_NegativeLookahead(self, lookahead: grammars.NegativeLookahead): + self.print('with self._ifnot():') + with self.indent(): + self.walk(lookahead.exp) + + def walk_Sequence(self, seq: grammars.Sequence): + self.walk(seq.sequence) + with self.indent(): + self._gen_defines_declaration(seq) + + def walk_Choice(self, choice: grammars.Choice): + if len(choice.options) == 1: + self.walk(choice.options[0]) + return + + firstset = choice.lookahead_str() + if firstset: + msglines = textwrap.wrap(firstset, width=40) + error = ['expecting one of: ', *msglines] + else: + error = ['no available options'] + errors = '\n'.join(repr(e) for e in error) + + self.print('with self._choice():') + with self.indent(): + self.walk(choice.options) + self.print('self._error(') + with self.indent(): + self.print(errors) + self.print(')') + def _gen_keywords(self, grammar: grammars.Grammar): keywords = [str(k) for k in grammar.keywords if k is not None] if not keywords: @@ -230,7 +270,7 @@ def _gen_parsing(self, grammar: grammars.Grammar): self._gen_init(grammar) self.walk(grammar.rules) - def _make_defines_declaration(self, node: grammars.Model): + def _gen_defines_declaration(self, node: grammars.Model): defines = compress_seq(node.defines()) ldefs = oset(safe_name(d) for d, value in defines if value) sdefs = oset( @@ -240,18 +280,16 @@ def _make_defines_declaration(self, node: grammars.Model): ) if not (sdefs or ldefs): - return '' + return + + sdefs_str = ', '.join(sorted(repr(d) for d in sdefs)) + ldefs_str = ', '.join(sorted(repr(d) for d in ldefs)) + + if not ldefs: + self.print(f'self._define([{sdefs_str}], [{ldefs_str}])') else: - sdefs_str = '[%s]' % ', '.join(sorted(repr(d) for d in sdefs)) - ldefs_str = '[%s]' % ', '.join(sorted(repr(d) for d in ldefs)) - if not ldefs: - return f'\n\n self._define({sdefs_str}, {ldefs_str})' - else: - return '\n' + trim(self.define_template % (sdefs_str, ldefs_str)) - - define_template = """\ - self._define( - %s, - %s, - )\ - """ + self.print('self._define(') + with self.indent(): + self.print(f'[{sdefs_str}],') + self.print(f'[{ldefs_str}],') + self.print(')') From a0578881b0579e2de8e11ac9393e8d58df8963dd Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 10:28:03 -0400 Subject: [PATCH 15/51] [ngcodegen] add more node types to walker --- tatsu/ngcodegen/python.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 1b01ad0f..1a41be68 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -7,6 +7,7 @@ from .. import grammars from ..collections import OrderedSet as oset +from ..exceptions import CodegenError from ..mixins.indent import IndentPrintMixin from ..util import compress_seq, safe_name, trim from ..walkers import NodeWalker @@ -69,11 +70,11 @@ def __init__(self, parser_name: str = ''): self.parser_name = parser_name @classmethod - def counter(cls): + def _next_n(cls): return next(cls._counter) @classmethod - def reset_counter(cls): + def _reset_counter(cls): cls._counter = itertools.count() def print(self, *args, **kwargs): @@ -103,6 +104,7 @@ def param_repr(p): else: return repr(p.split('::')[0]) + self._reset_counter() params = kwparams = '' if rule.params: params = ', '.join( @@ -216,6 +218,21 @@ def walk_Choice(self, choice: grammars.Choice): self.print(errors) self.print(')') + def walk_Option(self, option: grammars.Option): + self.print('with self._option():') + with self.indent(): + self.walk(option.exp) + + def walk_Closure(self, closure: grammars.Closure): + if () in closure.exp.lookahead(): + raise CodegenError(f'{self.node} may repeat empty sequence') + + n = self._next_n() + self.print(f'def block{n}():') + with self.indent(): + self.walk(closure.exp) + self.print(f'self._closure(block{n})') + def _gen_keywords(self, grammar: grammars.Grammar): keywords = [str(k) for k in grammar.keywords if k is not None] if not keywords: From 8563ef1c3eb5a9dee63211df45999579fffbef01 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 10:33:23 -0400 Subject: [PATCH 16/51] [ngcodegen] add more node types to walker --- tatsu/ngcodegen/python.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 1a41be68..95f7d71e 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -231,8 +231,20 @@ def walk_Closure(self, closure: grammars.Closure): self.print(f'def block{n}():') with self.indent(): self.walk(closure.exp) + self.print() self.print(f'self._closure(block{n})') + def walk_PositiveClosure(self, closure: grammars.PositiveClosure): + if () in closure.exp.lookahead(): + raise CodegenError(f'{self.node} may repeat empty sequence') + + n = self._next_n() + self.print(f'def block{n}():') + with self.indent(): + self.walk(closure.exp) + self.print() + self.print(f'self._positive_closure(block{n})') + def _gen_keywords(self, grammar: grammars.Grammar): keywords = [str(k) for k in grammar.keywords if k is not None] if not keywords: From c3ab556a355513d3f227201ccd00962b79f9770a Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 11:10:31 -0400 Subject: [PATCH 17/51] [ngcodegen] add more node types to walker --- tatsu/ngcodegen/python.py | 98 ++++++++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 17 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 95f7d71e..c0803108 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -95,7 +95,7 @@ def walk_Grammar(self, grammar: grammars.Grammar): self._gen_parsing(grammar) self.print() - self.print(FOOTER) + self.print(FOOTER.format(name=self.parser_name)) def walk_Rule(self, rule: grammars.Rule): def param_repr(p): @@ -146,6 +146,9 @@ def _{rule.name}_(self): with self.indent(): self.print(self.walk(rule.exp)) + def walk_RuleRef(self, ref: grammars.RuleRef): + self.print(f'self._{ref.name}_()') + def walk_Void(self, void: grammars.Void): self.print('self._void()') @@ -155,6 +158,9 @@ def walk_Any(self, any: grammars.Any): def walk_Fail(self, fail: grammars.Fail): self.print('self._fail()') + def walk_Cut(self, cut: grammars.Cut): + self.print('self._cut()') + def walk_Comment(self, comment: grammars.Comment): lines = '\n'.join(f'# {c!s}' for c in comment.comment.splitlines()) self.print(f'\n{lines}\n') @@ -223,28 +229,74 @@ def walk_Option(self, option: grammars.Option): with self.indent(): self.walk(option.exp) - def walk_Closure(self, closure: grammars.Closure): - if () in closure.exp.lookahead(): - raise CodegenError(f'{self.node} may repeat empty sequence') - - n = self._next_n() - self.print(f'def block{n}():') + def walk_Optional(self, optional: grammars.Optional): + self.print('with self._optional():') with self.indent(): - self.walk(closure.exp) - self.print() + self.walk(optional.exp) + + def walk_EmptyClosure(self, closure: grammars.EmptyClosure): + self.print('self._empty_closure()') + + def walk_Closure(self, closure: grammars.Closure): + n = self._gen_block(closure) self.print(f'self._closure(block{n})') def walk_PositiveClosure(self, closure: grammars.PositiveClosure): - if () in closure.exp.lookahead(): - raise CodegenError(f'{self.node} may repeat empty sequence') - - n = self._next_n() - self.print(f'def block{n}():') - with self.indent(): - self.walk(closure.exp) - self.print() + n = self._gen_block(closure) self.print(f'self._positive_closure(block{n})') + def walk_Join(self, join: grammars.Join): + n = self._gen_block(join, name='sep') + n = self._gen_block(join) + self.print(f'self._join(block{n}, sep{n})') + + def walk_PositiveJoin(self, join: grammars.PositiveJoin): + n = self._gen_block(join, name='sep') + n = self._gen_block(join) + self.print(f'self._positive_join(block{n}, sep{n})') + + def walk_LeftJoin(self, join: grammars.LeftJoin): + n = self._gen_block(join, name='sep') + n = self._gen_block(join) + self.print(f'self._left_join(block{n}, sep{n})') + + def walk_RightJoin(self, join: grammars.RightJoin): + n = self._gen_block(join, name='sep') + n = self._gen_block(join) + self.print(f'self._right_join(block{n}, sep{n})') + + def walk_Gather(self, gather: grammars.Gather): + n = self._gen_block(gather, name='sep') + n = self._gen_block(gather) + self.print(f'self._gather(block{n}, sep{n})') + + def walk_PositiveGather(self, gather: grammars.PositiveGather): + n = self._gen_block(gather, name='sep') + n = self._gen_block(gather) + self.print(f'self._positive_gather(block{n}, sep{n})') + + def walk_SkipTo(self, skipto: grammars.SkipTo): + n = self._gen_block(skipto) + self.print(f'self._skip_to(block{n})') + + + def walk_Named(self, named: grammars.Named): + self.walk(named.exp) + self.print(f"self.name_last_node('{named.name}')") + + def walk_NamedList(self, named: grammars.Named): + self.walk(named.exp) + self.print(f"self.add_last_node_to_name('{named.name}')") + + def walk_Override(self, override: grammars.Override): + self.walk_Named(override) + + def walk_OverrideList(self, override: grammars.OverrideList): + self.walk_NamedList(override) + + def walk_Special(self, special: grammars.Special): + pass + def _gen_keywords(self, grammar: grammars.Grammar): keywords = [str(k) for k in grammar.keywords if k is not None] if not keywords: @@ -322,3 +374,15 @@ def _gen_defines_declaration(self, node: grammars.Model): self.print(f'[{sdefs_str}],') self.print(f'[{ldefs_str}],') self.print(')') + + def _gen_block(self, node: grammars.Decorator, name='block'): + if () in node.exp.lookahead(): + raise CodegenError(f'{self.node} may repeat empty sequence') + + n = self._next_n() + self.print() + self.print(f'def {name}{n}():') + with self.indent(): + self.walk(node.exp) + + return n From 367f7302faafef4005e6bd8e00bc72c20ca1a8c6 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 11:26:47 -0400 Subject: [PATCH 18/51] [ngcodegen] add more node types to walker --- tatsu/ngcodegen/python.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index c0803108..531360c4 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -146,9 +146,17 @@ def _{rule.name}_(self): with self.indent(): self.print(self.walk(rule.exp)) + def walk_BasedRule(self, rule: grammars.BasedRule): + # FIXME: the following override is to not alter the previous codegen + rule.exp = rule.rhs + self.walk_Rule(rule) + def walk_RuleRef(self, ref: grammars.RuleRef): self.print(f'self._{ref.name}_()') + def wal_RuleInclude(self, include: grammars.RuleInclude): + self.walk(include.rule.exp) + def walk_Void(self, void: grammars.Void): self.print('self._void()') From 37c0dc45094cc5517083ce14dddea3a09557570b Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 12:03:32 -0400 Subject: [PATCH 19/51] [ngcodegen] bug fixes --- tatsu/ngcodegen/python.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 531360c4..20ad2f27 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -121,10 +121,6 @@ def param_repr(p): elif kwparams: params = kwparams - if not isinstance(rule.exp, grammars.Choice): - with self.indent(): - self._gen_defines_declaration(rule) - leftrec = '\n@leftrec' if rule.is_leftrec else '' nomemo = ( '\n@nomemo' @@ -145,6 +141,9 @@ def _{rule.name}_(self): ) with self.indent(): self.print(self.walk(rule.exp)) + if not isinstance(rule.exp, grammars.Choice): + self._gen_defines_declaration(rule) + def walk_BasedRule(self, rule: grammars.BasedRule): # FIXME: the following override is to not alter the previous codegen @@ -154,7 +153,7 @@ def walk_BasedRule(self, rule: grammars.BasedRule): def walk_RuleRef(self, ref: grammars.RuleRef): self.print(f'self._{ref.name}_()') - def wal_RuleInclude(self, include: grammars.RuleInclude): + def walk_RuleInclude(self, include: grammars.RuleInclude): self.walk(include.rule.exp) def walk_Void(self, void: grammars.Void): @@ -208,8 +207,7 @@ def walk_NegativeLookahead(self, lookahead: grammars.NegativeLookahead): def walk_Sequence(self, seq: grammars.Sequence): self.walk(seq.sequence) - with self.indent(): - self._gen_defines_declaration(seq) + self._gen_defines_declaration(seq) def walk_Choice(self, choice: grammars.Choice): if len(choice.options) == 1: @@ -274,14 +272,14 @@ def walk_RightJoin(self, join: grammars.RightJoin): self.print(f'self._right_join(block{n}, sep{n})') def walk_Gather(self, gather: grammars.Gather): - n = self._gen_block(gather, name='sep') + m = self._gen_block(gather, name='sep') n = self._gen_block(gather) - self.print(f'self._gather(block{n}, sep{n})') + self.print(f'self._gather(block{n}, sep{m})') def walk_PositiveGather(self, gather: grammars.PositiveGather): - n = self._gen_block(gather, name='sep') + m = self._gen_block(gather, name='sep') n = self._gen_block(gather) - self.print(f'self._positive_gather(block{n}, sep{n})') + self.print(f'self._positive_gather(block{n}, sep{m})') def walk_SkipTo(self, skipto: grammars.SkipTo): n = self._gen_block(skipto) @@ -325,7 +323,7 @@ def _gen_init(self, grammar: grammars.Grammar): config = ParserConfig.new( config, owner=self, - whitespace={grammar.config.whitespace}, + whitespace={grammar.config.whitespace!r}, nameguard={grammar.config.nameguard}, ignorecase={grammar.config.ignorecase}, namechars={grammar.config.namechars or None}, @@ -336,7 +334,6 @@ def _gen_init(self, grammar: grammars.Grammar): start={start!r}, ) config = config.replace(**settings) - super().__init__(text, config=config) ''', ) self.print() @@ -348,6 +345,7 @@ def _gen_buffering(self, grammar: grammars.Grammar): self.print('def __init__(self, text, /, config: ParserConfig | None = None, **settings):') with self.indent(): self._gen_init(grammar) + self.print('super().__init__(text, config=config)') self.print() @@ -357,6 +355,7 @@ def _gen_parsing(self, grammar: grammars.Grammar): self.print('def __init__(self, /, config: ParserConfig | None = None, **settings):') with self.indent(): self._gen_init(grammar) + self.print('super().__init__(config=config)') self.walk(grammar.rules) def _gen_defines_declaration(self, node: grammars.Model): From 82a5b9093f2f4442e7c728d328e8065fc210993b Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 12:06:47 -0400 Subject: [PATCH 20/51] [ngcodegen] fix bugs --- tatsu/ngcodegen/python.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 20ad2f27..53eb77d0 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -244,45 +244,45 @@ def walk_EmptyClosure(self, closure: grammars.EmptyClosure): self.print('self._empty_closure()') def walk_Closure(self, closure: grammars.Closure): - n = self._gen_block(closure) + n = self._gen_block(closure.exp) self.print(f'self._closure(block{n})') def walk_PositiveClosure(self, closure: grammars.PositiveClosure): - n = self._gen_block(closure) + n = self._gen_block(closure.exp) self.print(f'self._positive_closure(block{n})') def walk_Join(self, join: grammars.Join): - n = self._gen_block(join, name='sep') - n = self._gen_block(join) + n = self._gen_block(join.sep, name='sep') + n = self._gen_block(join.exp) self.print(f'self._join(block{n}, sep{n})') def walk_PositiveJoin(self, join: grammars.PositiveJoin): - n = self._gen_block(join, name='sep') - n = self._gen_block(join) + n = self._gen_block(join.sep, name='sep') + n = self._gen_block(join.exp) self.print(f'self._positive_join(block{n}, sep{n})') def walk_LeftJoin(self, join: grammars.LeftJoin): - n = self._gen_block(join, name='sep') - n = self._gen_block(join) + n = self._gen_block(join.sep, name='sep') + n = self._gen_block(join.exp) self.print(f'self._left_join(block{n}, sep{n})') def walk_RightJoin(self, join: grammars.RightJoin): - n = self._gen_block(join, name='sep') - n = self._gen_block(join) + n = self._gen_block(join.sep, name='sep') + n = self._gen_block(join.exp) self.print(f'self._right_join(block{n}, sep{n})') def walk_Gather(self, gather: grammars.Gather): - m = self._gen_block(gather, name='sep') - n = self._gen_block(gather) + m = self._gen_block(gather.sep, name='sep') + n = self._gen_block(gather.exp) self.print(f'self._gather(block{n}, sep{m})') def walk_PositiveGather(self, gather: grammars.PositiveGather): - m = self._gen_block(gather, name='sep') - n = self._gen_block(gather) + m = self._gen_block(gather.sep, name='sep') + n = self._gen_block(gather.exp) self.print(f'self._positive_gather(block{n}, sep{m})') def walk_SkipTo(self, skipto: grammars.SkipTo): - n = self._gen_block(skipto) + n = self._gen_block(skipto.exp) self.print(f'self._skip_to(block{n})') @@ -382,14 +382,14 @@ def _gen_defines_declaration(self, node: grammars.Model): self.print(f'[{ldefs_str}],') self.print(')') - def _gen_block(self, node: grammars.Decorator, name='block'): - if () in node.exp.lookahead(): + def _gen_block(self, exp: grammars.Model, name='block'): + if () in exp.lookahead(): raise CodegenError(f'{self.node} may repeat empty sequence') n = self._next_n() self.print() self.print(f'def {name}{n}():') with self.indent(): - self.walk(node.exp) + self.walk(exp) return n From 09d6139f45f5e5cbfd5f23a4d84dc526d2367ff6 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 12:46:56 -0400 Subject: [PATCH 21/51] [mixins][indent] allways trim left spacing in arguments --- tatsu/mixins/indent.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tatsu/mixins/indent.py b/tatsu/mixins/indent.py index d4be8b28..f707a1d5 100644 --- a/tatsu/mixins/indent.py +++ b/tatsu/mixins/indent.py @@ -14,6 +14,7 @@ def printed_text(self): return self.output_stream.getvalue() def print(self, *args, **kwargs): + args = [trim(str(arg)) for arg in args if arg is not None] lines = self.as_printed_lines(*args, **kwargs) self._do_print_lines(lines) From 18462a5524f47e6870271eb7ac234865c2a7ca75 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 14:06:26 -0400 Subject: [PATCH 22/51] [ngcodegen][model] bootstrap model generation --- tatsu/codegen/python.py | 4 +- tatsu/ngcodegen/objectmodel.py | 137 +++++++++++++++++++++++++++++++++ tatsu/ngcodegen/python.py | 11 +-- tatsu/tool.py | 11 ++- 4 files changed, 152 insertions(+), 11 deletions(-) create mode 100644 tatsu/ngcodegen/objectmodel.py diff --git a/tatsu/codegen/python.py b/tatsu/codegen/python.py index ec831c1b..0b3f766f 100755 --- a/tatsu/codegen/python.py +++ b/tatsu/codegen/python.py @@ -498,9 +498,9 @@ def {name}(self, ast): """ template = """\ - #!/usr/bin/env python + #!/usr/bin/env python3 - # CAVEAT UTILITOR + # WARNING: CAVEAT UTILITOR # # This file was automatically generated by TatSu. # diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py new file mode 100644 index 00000000..306ca10e --- /dev/null +++ b/tatsu/ngcodegen/objectmodel.py @@ -0,0 +1,137 @@ +import inspect +import re +from collections import namedtuple + +from .. import grammars, objectmodel +from ..exceptions import CodegenError +from ..mixins.indent import IndentPrintMixin +from ..util import safe_name, compress_seq +from ..walkers import NodeWalker + + +HEADER = """\ + #!/usr/bin/env python3 + + # WARNING: CAVEAT UTILITOR + # + # This file was automatically generated by TatSu. + # + # https://pypi.python.org/pypi/tatsu/ + # + # Any changes you make to it will be overwritten the next time + # the file is generated. + + from __future__ import annotations + + from typing import Any + from dataclasses import dataclass + + from tatsu.semantics import ModelBuilderSemantics + {base_type_import} + + + @dataclass(eq=False) + class {name}ModelBase({base_type}): + pass + + + class {name}ModelBuilderSemantics(ModelBuilderSemantics): + def __init__(self, context=None, types=None): + types = [ + t for t in globals().values() + if type(t) is type and issubclass(t, {name}ModelBase) + ] + (types or []) + super().__init__(context=context, types=types) +""" + + +TypeSpec = namedtuple('TypeSpec', ['class_name', 'base']) + + +def codegen(model: grammars.Model, parser_name: str = '', base_type: type = objectmodel.Node) -> str: + generator = PythonModelGenerator(parser_name=parser_name, base_type=base_type) + generator.walk(model) + return generator.printed_text() + + +class PythonModelGenerator(IndentPrintMixin, NodeWalker): + + def __init__(self, parser_name: str = '', base_type: type = objectmodel.Node): + super().__init__() + self.base_type = base_type + self.parser_name = parser_name or None + + def walk_Grammar(self, grammar: grammars.Grammar): + base_type_qual = self.base_type.__module__ + base_type_import = f'from {self.base_type.__module__} import {self.base_type.__name__.split('.')[-1]}' + + self.parser_name = self.parser_name or grammar.name + self.print( + HEADER.format( + name=self.parser_name, + base_type=self.base_type.__name__, + base_type_import=base_type_import, + ) + ) + self.print() + self.print() + + rule_index = {rule.name: rule for rule in grammar.rules} + rule_specs = { + rule.name: self._type_specs(rule) + for rule in grammar.rules + } + + model_classes = {s.class_name for spec in rule_specs.values() for s in spec} + base_classes = {s.base for spec in rule_specs.values() for s in spec} + base_classes -= model_classes + # raise Exception('HERE', base_classes, model_classes) + + def walk_Rule(self, rule: grammars.Rule): + specs = self._type_specs(rule) + if not specs: + return + + arguments = sorted({safe_name(d) for d, _ in compress_seq(rule.defines())}) + + self.print() + self.print() + + node_spec = specs[0] + base_specs = list(reversed(specs[1:])) + base = base_specs and base_specs[0] or f'{self.parser_name}ModelBase' + self.print(f'class {node_spec.class_name}({base}):') + + with self.indent(): + if not arguments: + self.print('pass') + for arg in arguments: + self.print(f'{arg}: Any = None') + + def _type_specs(self, rule: grammars.Rule) -> TypeSpec: + if not self._get_node_class_name(rule): + return [] + + spec = rule.params[0].split('::') + class_names = [safe_name(n) for n in spec] + [f'{self.parser_name}ModelBase'] + + typespec = [] + for i, class_name in enumerate(class_names[:-1]): + base = class_names[i + 1] + typespec.append(TypeSpec(class_name, base)) + + return typespec + + @staticmethod + def _get_node_class_name(rule: grammars.Rule): + if not rule.params: + return None + + node_names = rule.params[0] + if not isinstance(node_names, str): + return None + if not re.match(r'\w+(?:::\w+)*', node_names): + return None + if not node_names[0].isupper(): + return None + return node_names diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 53eb77d0..12c8b55d 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -13,7 +13,7 @@ from ..walkers import NodeWalker HEADER = """\ - #!/usr/bin/env python + #!/usr/bin/env python3 # WARNING: CAVEAT UTILITOR # @@ -61,7 +61,6 @@ def main(filename, **kwargs): """ - class PythonCodeGenerator(IndentPrintMixin, NodeWalker): _counter: Iterator[int] = itertools.count() @@ -77,15 +76,11 @@ def _next_n(cls): def _reset_counter(cls): cls._counter = itertools.count() - def print(self, *args, **kwargs): - args = [trim(str(arg)) for arg in args if arg is not None] - super().print(*args, **kwargs) - def walk_default(self, node: Any): return node def walk_Grammar(self, grammar: grammars.Grammar): - self.parser_name = self.parser_name or grammar.name + parser_name = self.parser_name or grammar.name self.print(HEADER) self.print() self.print() @@ -95,7 +90,7 @@ def walk_Grammar(self, grammar: grammars.Grammar): self._gen_parsing(grammar) self.print() - self.print(FOOTER.format(name=self.parser_name)) + self.print(FOOTER.format(name=parser_name)) def walk_Rule(self, rule: grammars.Rule): def param_repr(p): diff --git a/tatsu/tool.py b/tatsu/tool.py index 2f19b72a..26609aed 100644 --- a/tatsu/tool.py +++ b/tatsu/tool.py @@ -19,6 +19,7 @@ from .exceptions import ParseException from .infos import ParserConfig from .ngcodegen import codegen as ngpythoncg +from .ngcodegen import objectmodel as ngobjectmodel from .parser import GrammarGenerator from .semantics import ModelBuilderSemantics from .util import eval_escapes @@ -44,7 +45,13 @@ def parse_args(): main_mode.add_argument( '--ng-parser', '-x', - help='generate parser code from the grammar with new code generator', + help='generate parser code from the grammar using new code generator', + action='store_true', + ) + main_mode.add_argument( + '--ng-model', + '-y', + help='generate parser model from the grammar using the new code generator', action='store_true', ) main_mode.add_argument( @@ -337,6 +344,8 @@ def main(): result = objectmodel.codegen(model, base_type=args.base_type) elif args.ng_parser: result = ngpythoncg(model) + elif args.ng_model: + result = ngobjectmodel.codegen(model, parser_name=args.name) else: result = pythoncg(model) From e38a3dc7463d8068f1728d7109ed19e3a29c1fdf Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 14:41:08 -0400 Subject: [PATCH 23/51] [ngcodegen][objectmodel] add model generator --- tatsu/ngcodegen/objectmodel.py | 89 ++++++++++++++++++++-------------- tatsu/tool.py | 2 +- 2 files changed, 54 insertions(+), 37 deletions(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index 306ca10e..8a399d2e 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -1,9 +1,6 @@ -import inspect -import re from collections import namedtuple from .. import grammars, objectmodel -from ..exceptions import CodegenError from ..mixins.indent import IndentPrintMixin from ..util import safe_name, compress_seq from ..walkers import NodeWalker @@ -45,23 +42,22 @@ def __init__(self, context=None, types=None): """ -TypeSpec = namedtuple('TypeSpec', ['class_name', 'base']) +BaseClassSpec = namedtuple('TypeSpec', ['class_name', 'base']) -def codegen(model: grammars.Model, parser_name: str = '', base_type: type = objectmodel.Node) -> str: +def modelgen(model: grammars.Model, parser_name: str = '', base_type: type = objectmodel.Node) -> str: generator = PythonModelGenerator(parser_name=parser_name, base_type=base_type) - generator.walk(model) - return generator.printed_text() + return generator.generate_model(model) -class PythonModelGenerator(IndentPrintMixin, NodeWalker): +class PythonModelGenerator(IndentPrintMixin): def __init__(self, parser_name: str = '', base_type: type = objectmodel.Node): super().__init__() self.base_type = base_type self.parser_name = parser_name or None - def walk_Grammar(self, grammar: grammars.Grammar): + def generate_model(self, grammar: grammars.Grammar): base_type_qual = self.base_type.__module__ base_type_import = f'from {self.base_type.__module__} import {self.base_type.__name__.split('.')[-1]}' @@ -78,17 +74,55 @@ def walk_Grammar(self, grammar: grammars.Grammar): rule_index = {rule.name: rule for rule in grammar.rules} rule_specs = { - rule.name: self._type_specs(rule) + rule.name: self._base_class_specs(rule) for rule in grammar.rules } model_classes = {s.class_name for spec in rule_specs.values() for s in spec} base_classes = {s.base for spec in rule_specs.values() for s in spec} base_classes -= model_classes - # raise Exception('HERE', base_classes, model_classes) + + for base_name in base_classes: + if base_name in rule_specs: + self._gen_base_class(rule_specs[base_name]) + + for model_name, rule in rule_index.items(): + if model_name in rule_index: + self._gen_rule_class( + rule, + rule_specs[model_name], + ) + + return self.printed_text() + + def _gen_base_class(self, spec: BaseClassSpec): + self.print() + self.print() + if spec.base: + self.print(f'class {spec.class_name}({spec.base}):') + else: + self.print(f'class {spec.class_name}:') + with self.indent(): + self.print('pass') + + def _gen_rule_class(self, rule: grammars.Rule, specs: list[BaseClassSpec]): + if not specs: + return + spec = specs[0] + arguments = sorted({safe_name(d) for d, _ in compress_seq(rule.defines())}) + + self.print() + self.print() + self.print('@dataclass(eq=False)') + self.print(f'class {spec.class_name}({spec.base}):') + with self.indent(): + if not arguments: + self.print('pass') + for arg in arguments: + self.print(f'{arg}: Any = None') def walk_Rule(self, rule: grammars.Rule): - specs = self._type_specs(rule) + specs = self._base_class_specs(rule) if not specs: return @@ -108,30 +142,13 @@ def walk_Rule(self, rule: grammars.Rule): for arg in arguments: self.print(f'{arg}: Any = None') - def _type_specs(self, rule: grammars.Rule) -> TypeSpec: - if not self._get_node_class_name(rule): - return [] + def _base_class_specs(self, rule: grammars.Rule) -> BaseClassSpec: + if not rule.params: + return () spec = rule.params[0].split('::') class_names = [safe_name(n) for n in spec] + [f'{self.parser_name}ModelBase'] - - typespec = [] - for i, class_name in enumerate(class_names[:-1]): - base = class_names[i + 1] - typespec.append(TypeSpec(class_name, base)) - - return typespec - - @staticmethod - def _get_node_class_name(rule: grammars.Rule): - if not rule.params: - return None - - node_names = rule.params[0] - if not isinstance(node_names, str): - return None - if not re.match(r'\w+(?:::\w+)*', node_names): - return None - if not node_names[0].isupper(): - return None - return node_names + return tuple( + BaseClassSpec(class_name, class_names[i + 1]) + for i, class_name in enumerate(class_names[:-1]) + ) diff --git a/tatsu/tool.py b/tatsu/tool.py index 26609aed..fe9d0291 100644 --- a/tatsu/tool.py +++ b/tatsu/tool.py @@ -345,7 +345,7 @@ def main(): elif args.ng_parser: result = ngpythoncg(model) elif args.ng_model: - result = ngobjectmodel.codegen(model, parser_name=args.name) + result = ngobjectmodel.modelgen(model, parser_name=args.name) else: result = pythoncg(model) From 0cc8f0d38e5ec591dc989ab7e8b6787bcf725cf1 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 14:54:25 -0400 Subject: [PATCH 24/51] [ngcodegen][lint] clear linter warnings --- tatsu/ngcodegen/objectmodel.py | 49 ++++++++++------------------------ tatsu/ngcodegen/python.py | 4 +-- 2 files changed, 16 insertions(+), 37 deletions(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index 8a399d2e..73d99201 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -1,10 +1,9 @@ from collections import namedtuple +from typing import cast from .. import grammars, objectmodel from ..mixins.indent import IndentPrintMixin -from ..util import safe_name, compress_seq -from ..walkers import NodeWalker - +from ..util import compress_seq, safe_name HEADER = """\ #!/usr/bin/env python3 @@ -42,12 +41,12 @@ def __init__(self, context=None, types=None): """ -BaseClassSpec = namedtuple('TypeSpec', ['class_name', 'base']) +BaseClassSpec = namedtuple('BaseClassSpec', ['class_name', 'base']) def modelgen(model: grammars.Model, parser_name: str = '', base_type: type = objectmodel.Node) -> str: generator = PythonModelGenerator(parser_name=parser_name, base_type=base_type) - return generator.generate_model(model) + return generator.generate_model(cast(grammars.Grammar, model)) class PythonModelGenerator(IndentPrintMixin): @@ -58,8 +57,8 @@ def __init__(self, parser_name: str = '', base_type: type = objectmodel.Node): self.parser_name = parser_name or None def generate_model(self, grammar: grammars.Grammar): - base_type_qual = self.base_type.__module__ - base_type_import = f'from {self.base_type.__module__} import {self.base_type.__name__.split('.')[-1]}' + base_type = self.base_type + base_type_import = f'from {base_type.__module__} import {base_type.__name__.split('.')[-1]}' self.parser_name = self.parser_name or grammar.name self.print( @@ -67,7 +66,7 @@ def generate_model(self, grammar: grammars.Grammar): name=self.parser_name, base_type=self.base_type.__name__, base_type_import=base_type_import, - ) + ), ) self.print() self.print() @@ -95,10 +94,11 @@ def generate_model(self, grammar: grammars.Grammar): return self.printed_text() - def _gen_base_class(self, spec: BaseClassSpec): + def _gen_base_class(self, specs: list[BaseClassSpec]): self.print() self.print() - if spec.base: + spec = specs[0] + if specs[0].base: self.print(f'class {spec.class_name}({spec.base}):') else: self.print(f'class {spec.class_name}:') @@ -121,34 +121,13 @@ def _gen_rule_class(self, rule: grammars.Rule, specs: list[BaseClassSpec]): for arg in arguments: self.print(f'{arg}: Any = None') - def walk_Rule(self, rule: grammars.Rule): - specs = self._base_class_specs(rule) - if not specs: - return - - arguments = sorted({safe_name(d) for d, _ in compress_seq(rule.defines())}) - - self.print() - self.print() - - node_spec = specs[0] - base_specs = list(reversed(specs[1:])) - base = base_specs and base_specs[0] or f'{self.parser_name}ModelBase' - self.print(f'class {node_spec.class_name}({base}):') - - with self.indent(): - if not arguments: - self.print('pass') - for arg in arguments: - self.print(f'{arg}: Any = None') - - def _base_class_specs(self, rule: grammars.Rule) -> BaseClassSpec: + def _base_class_specs(self, rule: grammars.Rule) -> list[BaseClassSpec]: if not rule.params: - return () + return [] spec = rule.params[0].split('::') class_names = [safe_name(n) for n in spec] + [f'{self.parser_name}ModelBase'] - return tuple( + return [ BaseClassSpec(class_name, class_names[i + 1]) for i, class_name in enumerate(class_names[:-1]) - ) + ] diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 12c8b55d..ca03350f 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -9,7 +9,7 @@ from ..collections import OrderedSet as oset from ..exceptions import CodegenError from ..mixins.indent import IndentPrintMixin -from ..util import compress_seq, safe_name, trim +from ..util import compress_seq, safe_name from ..walkers import NodeWalker HEADER = """\ @@ -379,7 +379,7 @@ def _gen_defines_declaration(self, node: grammars.Model): def _gen_block(self, exp: grammars.Model, name='block'): if () in exp.lookahead(): - raise CodegenError(f'{self.node} may repeat empty sequence') + raise CodegenError(f'{exp} may repeat empty sequence') n = self._next_n() self.print() From 1deaee3142df5a33af5de2467f641d9d3a15cdfa Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 15:36:14 -0400 Subject: [PATCH 25/51] [test] solve lint warnings and update unit tests --- tatsu/ngcodegen/objectmodel.py | 2 +- test/grammar/parameter_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index 73d99201..83d25d80 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -58,7 +58,7 @@ def __init__(self, parser_name: str = '', base_type: type = objectmodel.Node): def generate_model(self, grammar: grammars.Grammar): base_type = self.base_type - base_type_import = f'from {base_type.__module__} import {base_type.__name__.split('.')[-1]}' + base_type_import = f"from {base_type.__module__} import {base_type.__name__.split('.')[-1]}" self.parser_name = self.parser_name or grammar.name self.print( diff --git a/test/grammar/parameter_test.py b/test/grammar/parameter_test.py index cbef4b1a..a30ac6db 100644 --- a/test/grammar/parameter_test.py +++ b/test/grammar/parameter_test.py @@ -18,7 +18,7 @@ def test_keyword_params(self): g = GrammarGenerator('Keywords') model = g.parse(grammar) code = codegen(model) - self.assertEqual('#!/usr/bin/env python', code.splitlines()[0]) + self.assertEqual('#!/usr/bin/env python3', code.splitlines()[0]) def test_35_only_keyword_params(self): grammar = """ From e6aebf1249682ee86ec740d0af3d2d38e6cac469 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 20:31:44 -0400 Subject: [PATCH 26/51] [mixins][indent] honor print() kwargs --- tatsu/mixins/indent.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tatsu/mixins/indent.py b/tatsu/mixins/indent.py index f707a1d5..e1026a7f 100644 --- a/tatsu/mixins/indent.py +++ b/tatsu/mixins/indent.py @@ -16,7 +16,7 @@ def printed_text(self): def print(self, *args, **kwargs): args = [trim(str(arg)) for arg in args if arg is not None] lines = self.as_printed_lines(*args, **kwargs) - self._do_print_lines(lines) + self._do_print_lines(lines, **kwargs) def as_printed(self, *args, **kwargs): lines = self.as_printed_lines(*args, **kwargs) @@ -49,13 +49,13 @@ def io_print(*args, **kwargs): print(*args, file=output, **kwargs) return output.getvalue() - def _do_print_lines(self, lines: list[str] | None = None): + def _do_print_lines(self, lines: list[str] | None = None, **kwargs): if not lines: - print(file=self.output_stream) + print(file=self.output_stream, **kwargs) return for line in lines: - print(line, file=self.output_stream) + print(line, file=self.output_stream, **kwargs) def indented_lines(self, text): text = trim(text) From b8f4bdd2b80fb9b640102a369d4632eb18ebb9b6 Mon Sep 17 00:00:00 2001 From: apalala Date: Wed, 29 Nov 2023 20:32:33 -0400 Subject: [PATCH 27/51] [ngcodegen][python] debut with what unit tests say --- tatsu/ngcodegen/python.py | 10 +++++----- test/grammar/keyword_test.py | 1 + test/grammar/pattern_test.py | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index ca03350f..c4305483 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -127,11 +127,11 @@ def param_repr(p): self.print() self.print( f""" - @tatsumasu({params})\ - {leftrec}\ - {nomemo}\ - {isname} - def _{rule.name}_(self): + @tatsumasu({params})\ + {leftrec}\ + {nomemo}\ + {isname}\ + \ndef _{rule.name}_(self): """, ) with self.indent(): diff --git a/test/grammar/keyword_test.py b/test/grammar/keyword_test.py index 43452d07..2b6ed8bc 100644 --- a/test/grammar/keyword_test.py +++ b/test/grammar/keyword_test.py @@ -72,6 +72,7 @@ def test_check_keywords(self): """ model = compile(grammar, 'test') c = codegen(model) + print(c) parse(c) ast = model.parse('hello world') diff --git a/test/grammar/pattern_test.py b/test/grammar/pattern_test.py index 009ba9f5..9b1de687 100644 --- a/test/grammar/pattern_test.py +++ b/test/grammar/pattern_test.py @@ -112,7 +112,7 @@ def test_multiline_pattern(self): model = compile(grammar=trim(grammar)) print(codegen(model.rules[0].exp.sequence[0])) self.assertEqual( - codegen(model.rules[0].exp.sequence[0]), + codegen(model.rules[0].exp.sequence[0]).strip(), repr("self._pattern('(?x)\nfoo\nbar\n')").strip('"\''), ) From 6064a71fdd8970a1ebfecd9b0035e83c124c6af9 Mon Sep 17 00:00:00 2001 From: apalala Date: Fri, 8 Dec 2023 17:51:28 -0400 Subject: [PATCH 28/51] [tool] arg documentation --- tatsu/tool.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tatsu/tool.py b/tatsu/tool.py index fe9d0291..497c90d0 100644 --- a/tatsu/tool.py +++ b/tatsu/tool.py @@ -44,14 +44,12 @@ def parse_args(): ) main_mode.add_argument( '--ng-parser', - '-x', help='generate parser code from the grammar using new code generator', action='store_true', ) main_mode.add_argument( '--ng-model', - '-y', - help='generate parser model from the grammar using the new code generator', + help='generate a model from the grammar using the new code generator', action='store_true', ) main_mode.add_argument( From 4b6d2db048d96da1337a5da85595940d1086dd4b Mon Sep 17 00:00:00 2001 From: apalala Date: Fri, 8 Dec 2023 18:23:29 -0400 Subject: [PATCH 29/51] [lint] resolve warnings --- ruff.toml | 1 + tatsu/parser_semantics.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ruff.toml b/ruff.toml index 04687d63..28ca6f1e 100644 --- a/ruff.toml +++ b/ruff.toml @@ -20,6 +20,7 @@ ignore = [ "PLR0904", # too-many-public-methods "PLR0913", # too-many-arguments "PLR0915", # too-many-statements + "PLR0917", # too many possitional arguments "PLR2004", # magic-value-comparison "PLW1514", # unspecified-encoding # "PLW0603", # global-statement diff --git a/tatsu/parser_semantics.py b/tatsu/parser_semantics.py index a5ebba90..c61a6e55 100644 --- a/tatsu/parser_semantics.py +++ b/tatsu/parser_semantics.py @@ -142,7 +142,7 @@ def grammar(self, ast, *args): name = ( self.grammar_name if self.grammar_name - else directives.get('grammar', None) + else directives.get('grammar') ) return grammars.Grammar( name, From cf7fdfa7adbd33657c7469201cf44ccc3035106b Mon Sep 17 00:00:00 2001 From: apalala Date: Fri, 8 Dec 2023 19:51:48 -0400 Subject: [PATCH 30/51] [walkers] fix long standing bug --- tatsu/walkers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tatsu/walkers.py b/tatsu/walkers.py index 02423c02..0cd54f53 100644 --- a/tatsu/walkers.py +++ b/tatsu/walkers.py @@ -42,12 +42,12 @@ def walk(self, node: Node | list[Node], *args, **kwargs) -> Any: def walk_children(self, node: Node, *args, **kwargs): if not isinstance(node, Node): - return () + return [] - for child in node.children(): - return self.walk(child, *args, **kwargs) - - return None + return [ + self.walk(child, *args, **kwargs) + for child in node.children() + ] def _find_walker(self, node: Node, prefix='walk_'): def pythonize_match(m): From bae9710e3bfaa15deb2b4015fe321e1885399d07 Mon Sep 17 00:00:00 2001 From: apalala Date: Fri, 8 Dec 2023 19:52:24 -0400 Subject: [PATCH 31/51] [lint] fix warnings --- tatsu/ngcodegen/python.py | 1 - test/grammar/directive_test.py | 2 +- test/grammar/join_test.py | 2 +- test/grammar/keyword_test.py | 2 +- test/grammar/parameter_test.py | 2 +- test/grammar/pattern_test.py | 2 +- test/grammar/syntax_test.py | 2 +- test/zzz_bootstrap/bootstrap_test.py | 2 +- 8 files changed, 7 insertions(+), 8 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index c4305483..a8a46ac4 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -139,7 +139,6 @@ def param_repr(p): if not isinstance(rule.exp, grammars.Choice): self._gen_defines_declaration(rule) - def walk_BasedRule(self, rule: grammars.BasedRule): # FIXME: the following override is to not alter the previous codegen rule.exp = rule.rhs diff --git a/test/grammar/directive_test.py b/test/grammar/directive_test.py index 604d0cea..fe555955 100644 --- a/test/grammar/directive_test.py +++ b/test/grammar/directive_test.py @@ -1,8 +1,8 @@ import pytest import tatsu -from tatsu.codegen import codegen from tatsu.exceptions import FailedParse +from tatsu.ngcodegen import codegen from tatsu.util import trim EXEC = 'exec' diff --git a/test/grammar/join_test.py b/test/grammar/join_test.py index bd851028..f9430b9c 100644 --- a/test/grammar/join_test.py +++ b/test/grammar/join_test.py @@ -1,8 +1,8 @@ import unittest from ast import parse -from tatsu.codegen import codegen from tatsu.exceptions import FailedParse +from tatsu.ngcodegen import codegen from tatsu.tool import compile from tatsu.util import trim diff --git a/test/grammar/keyword_test.py b/test/grammar/keyword_test.py index 2b6ed8bc..4380354f 100644 --- a/test/grammar/keyword_test.py +++ b/test/grammar/keyword_test.py @@ -1,8 +1,8 @@ import unittest from ast import parse -from tatsu.codegen import codegen from tatsu.exceptions import FailedParse +from tatsu.ngcodegen import codegen from tatsu.tool import compile diff --git a/test/grammar/parameter_test.py b/test/grammar/parameter_test.py index a30ac6db..50dc2f6b 100644 --- a/test/grammar/parameter_test.py +++ b/test/grammar/parameter_test.py @@ -1,7 +1,7 @@ import contextlib import unittest -from tatsu.codegen import codegen +from tatsu.ngcodegen import codegen from tatsu.parser import GrammarGenerator from tatsu.tool import compile from tatsu.util import trim diff --git a/test/grammar/pattern_test.py b/test/grammar/pattern_test.py index 9b1de687..91094fad 100644 --- a/test/grammar/pattern_test.py +++ b/test/grammar/pattern_test.py @@ -1,7 +1,7 @@ import unittest -from tatsu.codegen import codegen from tatsu.exceptions import FailedParse +from tatsu.ngcodegen import codegen from tatsu.tool import compile from tatsu.util import trim diff --git a/test/grammar/syntax_test.py b/test/grammar/syntax_test.py index 50b04f40..e647d964 100644 --- a/test/grammar/syntax_test.py +++ b/test/grammar/syntax_test.py @@ -3,8 +3,8 @@ import pytest from tatsu import tool -from tatsu.codegen import codegen from tatsu.exceptions import FailedParse, FailedToken +from tatsu.ngcodegen import codegen from tatsu.parser import EBNFBuffer from tatsu.tool import compile from tatsu.util import trim diff --git a/test/zzz_bootstrap/bootstrap_test.py b/test/zzz_bootstrap/bootstrap_test.py index 1f1ae95b..cf359f01 100644 --- a/test/zzz_bootstrap/bootstrap_test.py +++ b/test/zzz_bootstrap/bootstrap_test.py @@ -9,7 +9,7 @@ from pathlib import Path from tatsu import util -from tatsu.codegen import codegen +from tatsu.ngcodegen import codegen from tatsu.parser import EBNFParser, GrammarGenerator from tatsu.parser_semantics import EBNFGrammarSemantics from tatsu.util import asjson From 0bcdf05519c43f02534df0211740ac167ee1b00e Mon Sep 17 00:00:00 2001 From: apalala Date: Fri, 8 Dec 2023 20:12:24 -0400 Subject: [PATCH 32/51] some refactoring --- tatsu/ngcodegen/objectmodel.py | 22 +++++++++++----------- tatsu/tool.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index 83d25d80..e683b5cd 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -44,26 +44,26 @@ def __init__(self, context=None, types=None): BaseClassSpec = namedtuple('BaseClassSpec', ['class_name', 'base']) -def modelgen(model: grammars.Model, parser_name: str = '', base_type: type = objectmodel.Node) -> str: - generator = PythonModelGenerator(parser_name=parser_name, base_type=base_type) +def modelgen(model: grammars.Model, name: str = '', base_type: type = objectmodel.Node) -> str: + generator = PythonModelGenerator(name=name, base_type=base_type) return generator.generate_model(cast(grammars.Grammar, model)) class PythonModelGenerator(IndentPrintMixin): - def __init__(self, parser_name: str = '', base_type: type = objectmodel.Node): + def __init__(self, name: str = '', base_type: type = objectmodel.Node): super().__init__() self.base_type = base_type - self.parser_name = parser_name or None + self.name = name or None def generate_model(self, grammar: grammars.Grammar): base_type = self.base_type base_type_import = f"from {base_type.__module__} import {base_type.__name__.split('.')[-1]}" - self.parser_name = self.parser_name or grammar.name + self.name = self.name or grammar.name self.print( HEADER.format( - name=self.parser_name, + name=self.name, base_type=self.base_type.__name__, base_type_import=base_type_import, ), @@ -83,7 +83,7 @@ def generate_model(self, grammar: grammars.Grammar): for base_name in base_classes: if base_name in rule_specs: - self._gen_base_class(rule_specs[base_name]) + self._gen_base_class(rule_specs[base_name][0]) for model_name, rule in rule_index.items(): if model_name in rule_index: @@ -94,13 +94,13 @@ def generate_model(self, grammar: grammars.Grammar): return self.printed_text() - def _gen_base_class(self, specs: list[BaseClassSpec]): + def _gen_base_class(self, spec: BaseClassSpec): self.print() self.print() - spec = specs[0] - if specs[0].base: + if spec.base: self.print(f'class {spec.class_name}({spec.base}):') else: + # FIXME: this cannot happen as base_type is the final base self.print(f'class {spec.class_name}:') with self.indent(): self.print('pass') @@ -126,7 +126,7 @@ def _base_class_specs(self, rule: grammars.Rule) -> list[BaseClassSpec]: return [] spec = rule.params[0].split('::') - class_names = [safe_name(n) for n in spec] + [f'{self.parser_name}ModelBase'] + class_names = [safe_name(n) for n in spec] + [f'{self.name}ModelBase'] return [ BaseClassSpec(class_name, class_names[i + 1]) for i, class_name in enumerate(class_names[:-1]) diff --git a/tatsu/tool.py b/tatsu/tool.py index 497c90d0..5c840421 100644 --- a/tatsu/tool.py +++ b/tatsu/tool.py @@ -343,7 +343,7 @@ def main(): elif args.ng_parser: result = ngpythoncg(model) elif args.ng_model: - result = ngobjectmodel.modelgen(model, parser_name=args.name) + result = ngobjectmodel.modelgen(model, name=args.name) else: result = pythoncg(model) From 1d1d0593a61875e63aa9df714796e59d0b20baed Mon Sep 17 00:00:00 2001 From: apalala Date: Fri, 8 Dec 2023 20:15:07 -0400 Subject: [PATCH 33/51] [docs] bug fix --- docs/print_translation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/print_translation.rst b/docs/print_translation.rst index da477b88..2d8d5ca3 100644 --- a/docs/print_translation.rst +++ b/docs/print_translation.rst @@ -21,7 +21,7 @@ and should be used thus: def walk_SomeNode(self, node): with self.indent(): - # ccontinue walking the tree + # continue walking the tree The ``self.print()`` method takes note of the current level of indentation, so From 58e73aaaa9ddc1ec1af8db7af999cf4c822fb533 Mon Sep 17 00:00:00 2001 From: apalala Date: Fri, 8 Dec 2023 20:33:50 -0400 Subject: [PATCH 34/51] [docs] deprecate declarative translation --- docs/declarative_translation.rst | 94 +++++++++++++++++++++++++++++++ docs/index.rst | 2 +- docs/print_translation.rst | 41 -------------- docs/translation.rst | 97 +++++++++----------------------- 4 files changed, 123 insertions(+), 111 deletions(-) create mode 100644 docs/declarative_translation.rst delete mode 100644 docs/print_translation.rst diff --git a/docs/declarative_translation.rst b/docs/declarative_translation.rst new file mode 100644 index 00000000..82189727 --- /dev/null +++ b/docs/declarative_translation.rst @@ -0,0 +1,94 @@ +.. include:: links.rst + +.. _mini-tutorial: mini-tutorial.rst + +.. _pegen: https://github.com/we-like-parsers/pegen +.. _PEG parser: https://peps.python.org/pep-0617/ + +Declarative Translation (Deprecated) +------------------------------------ + +Translation is one of the most common tasks in language processing. +Analysis often sumarizes the parsed input, and *walkers* are good for that. +In translation, the output can often be as verbose as the input, so a systematic approach that avoids bookkeeping as much as possible is convenient. + +|TatSu| provides support for template-based code generation ("translation", see below) +in the ``tatsu.codegen`` module. +Code generation works by defining a translation class for each class in the model specified by the grammar. + +Nowadays the preferred code generation strategy is to walk down the AST_ and `print()` the desired output, +with the help of the ``NodWalker`` class, and the ``IndentPrintMixin`` mixin. That's the strategy used +by pegen_, the precursor to the new `PEG parser`_ in Python_. Please take a lookt at the +`mini-tutorial`_ for an example. + +Basically, the code generation strategy changed from declarative with library support, to procedural, +breadth or depth first, using only standard Python_. The procedural code must know the AST_ structure +to navigate it, although other strategies are available with ``PreOrderWalker``, ``DepthFirstWalker``, +and ``ContextWalker``. + +**deprecated** + +|TatSu| doesn't impose a way to create translators with it, but it +exposes the facilities it uses to generate the `Python`_ source code for +parsers. + +Translation in |TatSu| was *template-based*, but instead of defining or +using a complex templating engine (yet another language), it relies on +the simple but powerful ``string.Formatter`` of the `Python`_ standard +library. The templates are simple strings that, in |TatSu|'s style, +are inlined with the code. + +To generate a parser, |TatSu| constructs an object model of the parsed +grammar. A ``tatsu.codegen.CodeGenerator`` instance matches model +objects to classes that descend from ``tatsu.codegen.ModelRenderer`` and +implement the translation and rendering using string templates. +Templates are left-trimmed on whitespace, like `Python`_ *doc-comments* +are. This is an example taken from |TatSu|'s source code: + +.. code:: python + + class Lookahead(ModelRenderer): + template = '''\ + with self._if(): + {exp:1::}\ + ''' + +Every *attribute* of the object that doesn't start with an underscore +(``_``) may be used as a template field, and fields can be added or +modified by overriding the ``render_fields(fields)`` method. Fields +themselves are *lazily rendered* before being expanded by the template, +so a field may be an instance of a ``ModelRenderer`` descendant. + +The ``rendering`` module defines a ``Formatter`` enhanced to support the +rendering of items in an *iterable* one by one. The syntax to achieve +that is: + +.. code:: python + + ''' + {fieldname:ind:sep:fmt} + ''' + +All of ``ind``, ``sep``, and ``fmt`` are optional, but the three +*colons* are not. A field specified that way will be rendered using: + +.. code:: python + + indent(sep.join(fmt % render(v) for v in value), ind) + +The extended format can also be used with non-iterables, in which case +the rendering will be: + +.. code:: python + + indent(fmt % render(value), ind) + +The default multiplier for ``ind`` is ``4``, but that can be overridden +using ``n*m`` (for example ``3*1``) in the format. + +**note** + Using a newline character (``\n``) as separator will interfere with + left trimming and indentation of templates. To use a newline as + separator, specify it as ``\\n``, and the renderer will understand + the intention. + diff --git a/docs/index.rst b/docs/index.rst index e50f9bf5..1152e6ec 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -47,8 +47,8 @@ input, much like the `re`_ module does with regular expressions, or it can gener semantics models asjson - print_translation translation + declarative_translation left_recursion mini-tutorial traces diff --git a/docs/print_translation.rst b/docs/print_translation.rst deleted file mode 100644 index 2d8d5ca3..00000000 --- a/docs/print_translation.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. include:: links.rst - -Print Translation ------------------ - - -|TatSu| doesn't impose a way to create translators, but it -exposes the facilities it uses to generate the `Python`_ source code for -parsers. - -Translation in |TatSu| is based on subclasses of ``Walker`` and on classes that -inherit from ``IndentPrintMixin``, a strategy copied from the new PEG_ parser -in Python_ (see `PEP 617`_). - -``IndentPrintMixin`` provides an ``indent()`` method, which is a context manager, -and should be used thus: - -.. code:: python - - class MyTranslationWalker(NodeWalker, IndentPrintMixin): - - def walk_SomeNode(self, node): - with self.indent(): - # continue walking the tree - - -The ``self.print()`` method takes note of the current level of indentation, so -output will be indented by the ``indent`` passed to -the ``IndentPrintConstructor``: - -.. code:: python - - def walk_SomeNode(self, node): - with self.indent(): - self.print(walk_expression(node.exp)) - -The printed code can be retrieved using the ``printed_text()`` method. Other -posibilities are available by assigning a text-like object to -``self.output_stream`` in the ``__init__()`` method. - -.. _PEP 617: https://peps.python.org/pep-0617/ diff --git a/docs/translation.rst b/docs/translation.rst index 192ac503..60afc1b5 100644 --- a/docs/translation.rst +++ b/docs/translation.rst @@ -1,94 +1,53 @@ .. include:: links.rst -.. _mini-tutorial: mini-tutorial.rst - -.. _pegen: https://github.com/we-like-parsers/pegen -.. _PEG parser: https://peps.python.org/pep-0617/ - -Declarative Translation ------------------------ +Translation +----------- Translation is one of the most common tasks in language processing. Analysis often sumarizes the parsed input, and *walkers* are good for that. -In translation, the output can often be as verbose as the input, so a systematic approach that avoids bookkeeping as much as possible is convenient. - -|TatSu| provides support for template-based code generation ("translation", see below) -in the ``tatsu.codegen`` module. -Code generation works by defining a translation class for each class in the model specified by the grammar. - -Nowadays the preferred code generation strategy is to walk down the AST_ and `print()` the desired output, -with the help of the ``NodWalker`` class, and the ``IndentPrintMixin`` mixin. That's the strategy used -by pegen_, the precursor to the new `PEG parser`_ in Python_. Please take a lookt at the -`mini-tutorial`_ for an example. - -Basically, the code generation strategy changed from declarative with library support, to procedural, -breadth or depth first, using only standard Python_. The procedural code must know the AST_ structure -to navigate it, although other strategies are available with ``PreOrderWalker``, ``DepthFirstWalker``, -and ``ContextWalker``. -**deprecated** -|TatSu| doesn't impose a way to create translators with it, but it +|TatSu| doesn't impose a way to create translators, but it exposes the facilities it uses to generate the `Python`_ source code for parsers. -Translation in |TatSu| was *template-based*, but instead of defining or -using a complex templating engine (yet another language), it relies on -the simple but powerful ``string.Formatter`` of the `Python`_ standard -library. The templates are simple strings that, in |TatSu|'s style, -are inlined with the code. +Translation in |TatSu| is based on subclasses of ``Walker`` and on classes that +inherit from ``IndentPrintMixin``, a strategy copied from the new PEG_ parser +in Python_ (see `PEP 617`_). -To generate a parser, |TatSu| constructs an object model of the parsed -grammar. A ``tatsu.codegen.CodeGenerator`` instance matches model -objects to classes that descend from ``tatsu.codegen.ModelRenderer`` and -implement the translation and rendering using string templates. -Templates are left-trimmed on whitespace, like `Python`_ *doc-comments* -are. This is an example taken from |TatSu|'s source code: +``IndentPrintMixin`` provides an ``indent()`` method, which is a context manager, +and should be used thus: .. code:: python - class Lookahead(ModelRenderer): - template = '''\ - with self._if(): - {exp:1::}\ - ''' + class MyTranslationWalker(NodeWalker, IndentPrintMixin): -Every *attribute* of the object that doesn't start with an underscore -(``_``) may be used as a template field, and fields can be added or -modified by overriding the ``render_fields(fields)`` method. Fields -themselves are *lazily rendered* before being expanded by the template, -so a field may be an instance of a ``ModelRenderer`` descendant. + def walk_SomeNode(self, node): + self.print('some preamble') + with self.indent(): + # continue walking the tree -The ``rendering`` module defines a ``Formatter`` enhanced to support the -rendering of items in an *iterable* one by one. The syntax to achieve -that is: -.. code:: python - - ''' - {fieldname:ind:sep:fmt} - ''' +The ``self.print()`` method takes note of the current level of indentation, so +output will be indented by the `indent` passed to +the ``IndentPrintMixin`` constructor, or to the ``indent(iamoun:int)`` method. +The mixin keeps as stack of the indent ammounts so it can go back to where it +was after each ``with indent(amount=n):`` statement: -All of ``ind``, ``sep``, and ``fmt`` are optional, but the three -*colons* are not. A field specified that way will be rendered using: .. code:: python - indent(sep.join(fmt % render(v) for v in value), ind) - -The extended format can also be used with non-iterables, in which case -the rendering will be: - -.. code:: python + def walk_SomeNode(self, node): + with self.indent(amount=2): + self.print(walk_expression(node.exp)) - indent(fmt % render(value), ind) +The printed code can be retrieved using the ``printed_text()`` method, but other +posibilities are available by assigning a text-like object to +``self.output_stream`` in the ``__init__()`` method. -The default multiplier for ``ind`` is ``4``, but that can be overridden -using ``n*m`` (for example ``3*1``) in the format. +A good example of how to do code generation with a ``NodeWalker`` is |TatSu|'s own +code generator, which can be found in ``tatsu/ngcodegen/python.py``, or the model +generation found in ``tatsu/ngcodegen/objectomdel.py``. -**note** - Using a newline character (``\n``) as separator will interfere with - left trimming and indentation of templates. To use a newline as - separator, specify it as ``\\n``, and the renderer will understand - the intention. +.. _PEP 617: https://peps.python.org/pep-0617/ From 48df36e0bb986293f825088654b686ed442f3db8 Mon Sep 17 00:00:00 2001 From: apalala Date: Sat, 9 Dec 2023 12:17:23 -0400 Subject: [PATCH 35/51] [walkers] refactor and cleanup --- tatsu/walkers.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tatsu/walkers.py b/tatsu/walkers.py index 0cd54f53..88867d06 100644 --- a/tatsu/walkers.py +++ b/tatsu/walkers.py @@ -57,8 +57,8 @@ def pythonize_match(m): node_cls = node.__class__ node_cls_qualname = node_cls.__qualname__ - if node_cls_qualname in self._walker_cache: - return self._walker_cache[node_cls_qualname] + if walker := self._walker_cache.get(node_cls_qualname): + return walker node_classes = [node.__class__] while node_classes: @@ -78,13 +78,12 @@ def pythonize_match(m): if callable(walker): break - # walk_pythonic_name with single underscore after walk - - # pythonic_name = pythonic_name.lstrip('_') - # if pythonic_name != cammelcase_name: - # walker = getattr(cls, prefix + pythonic_name, None) - # if callable(walker): - # break + # walk_pythonic_name with single underscore after prefix + pythonic_name = pythonic_name.lstrip('_') + if pythonic_name != cammelcase_name: + walker = getattr(cls, prefix + pythonic_name, None) + if callable(walker): + break for b in node_cls.__bases__: if b not in node_classes: From 2f9d487ab9c1acee301e2cd53af8f3bec77a2ad1 Mon Sep 17 00:00:00 2001 From: apalala Date: Sat, 9 Dec 2023 12:17:47 -0400 Subject: [PATCH 36/51] [docs] update for walkers and refactor --- docs/antlr.rst | 4 ++-- docs/asjson.rst | 25 ------------------------ docs/grako.rst | 19 ------------------- docs/index.rst | 2 -- docs/install.rst | 3 ++- docs/models.rst | 49 ++++++++++++++++++++++++++++++++++++++---------- 6 files changed, 43 insertions(+), 59 deletions(-) delete mode 100644 docs/asjson.rst delete mode 100644 docs/grako.rst diff --git a/docs/antlr.rst b/docs/antlr.rst index 52846491..082d535b 100644 --- a/docs/antlr.rst +++ b/docs/antlr.rst @@ -1,8 +1,8 @@ .. include:: links.rst -Using ANTLR Grammars --------------------- +ANTLR Grammars +-------------- .. _grammars: https://github.com/antlr/grammars-v4 diff --git a/docs/asjson.rst b/docs/asjson.rst deleted file mode 100644 index 7edd772c..00000000 --- a/docs/asjson.rst +++ /dev/null @@ -1,25 +0,0 @@ -.. include:: links.rst - -Viewing Models as JSON ----------------------- - - -Models generated by |TatSu| can be viewed by converting them to a JSON-compatible structure -with the help of ``tatsu.util.asjson()``. The protocol tries to provide the best -representation for common types, and can handle any type using ``repr()``. There are provisions for structures with back-references, so there's no infinite recursion. - -.. code:: python - - import json - - print(json.dumps(asjson(model), indent=2)) - -The ``model``, with richer semantics, remains unaltered. - -Conversion to a JSON-compatible structure relies on the protocol defined by -``tatsu.utils.AsJSONMixin``. The mixin defines a ``__json__(seen=None)`` -method that allows classes to define their best translation. You can use ``AsJSONMixin`` -as a base class in your own models to take advantage of ``asjson()``, and you can -specialize the conversion by overriding ``AsJSONMixin.__json__()``. - -You can also write your own version of ``asjson()`` to handle special cases that are recurrent in your context. diff --git a/docs/grako.rst b/docs/grako.rst deleted file mode 100644 index 2b7ecc5f..00000000 --- a/docs/grako.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. include:: links.rst - -Grako Compatibility -------------------- - -|TatSu| is routinely tested over major projects developed with Grako_. The -backwards-compatibility suite includes (at least) translators for COBOL_, Java_, and (Oracle) SQL_. - -Grako_ grammars and projects can be used with |TatSu|, with these caveats: - -* The `AST`_ type retuned when a sequence of elements is matched is now ``tuple`` (instead of a descendant of ``list``). This change improves efficiency and avoids unwanted manipulations of a value that should be inmutable. - -* The Python_ module name changed to ``tatsu``. - -* ``ignorecase`` no longer applies to regular expressions in grammars. Use ``(?i)`` in the pattern to enable ``re.IGNORECASE`` - -* Left recursion is enabled by default because it works and has zero impact on non-recursive grammars. - -* Deprecated grammar syntax is no longer documented. It's best not to use it, as it will be removed in a future version of |TatSu|. diff --git a/docs/index.rst b/docs/index.rst index 1152e6ec..dab6bc5b 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -46,13 +46,11 @@ input, much like the `re`_ module does with regular expressions, or it can gener ast semantics models - asjson translation declarative_translation left_recursion mini-tutorial traces - grako antlr examples support diff --git a/docs/install.rst b/docs/install.rst index b359b7e1..c7c2d931 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -9,5 +9,6 @@ Installation $ pip install tatsu .. warning:: - Versions of |TatSu| since 5.0.0 may require Python>=3.8. Python 2.7 is no longer supported + Modern versions of |TatSu| require active versions of Python (if the Python + version is more than one and a half years old, things may not work). diff --git a/docs/models.rst b/docs/models.rst index 21867e33..b54dcb6d 100644 --- a/docs/models.rst +++ b/docs/models.rst @@ -1,8 +1,12 @@ .. include:: links.rst +Models +------ + + Building Models ---------------- +~~~~~~~~~~~~~~~ Naming elements in grammar rules makes the parser discard uninteresting parts of the input, like punctuation, to produce an *Abstract Syntax @@ -41,6 +45,32 @@ You can also use `Python`_'s built-in types as node types, and default behavior can be overidden by defining a method to handle the result of any particular grammar rule. + + +Viewing Models as JSON +~~~~~~~~~~~~~~~~~~~~~~ + + +Models generated by |TatSu| can be viewed by converting them to a JSON-compatible structure +with the help of ``tatsu.util.asjson()``. The protocol tries to provide the best +representation for common types, and can handle any type using ``repr()``. There are provisions for structures with back-references, so there's no infinite recursion. + +.. code:: python + + import json + + print(json.dumps(asjson(model), indent=2)) + +The ``model``, with richer semantics, remains unaltered. + +Conversion to a JSON-compatible structure relies on the protocol defined by +``tatsu.utils.AsJSONMixin``. The mixin defines a ``__json__(seen=None)`` +method that allows classes to define their best translation. You can use ``AsJSONMixin`` +as a base class in your own models to take advantage of ``asjson()``, and you can +specialize the conversion by overriding ``AsJSONMixin.__json__()``. + +You can also write your own version of ``asjson()`` to handle special cases that are recurrent in your context. + Walking Models ~~~~~~~~~~~~~~ @@ -82,19 +112,18 @@ methods such as: return s def walk_object(self, o): - raise Exception('Unexpected tyle %s walked', type(o).__name__) + raise Exception(f'Unexpected type {type(o).__name__} walked') -Predeclared classes can be passed to ``ModelBuilderSemantics`` instances -through the ``types=`` parameter: - -.. code:: python +Which nodes get *walked* is up to the ``NodeWalker`` implementation. Some +strategies for walking *all* or *most* nodes are implemented as classes +in ``tatsu.wakers``, such as ``PreOrderWalker`` and ``DepthFirstWalker``. - from mymodel import AddOperator, MulOperator +Sometimes nodes must be walked more than once for the purpose at hand, and it's +up to the walker how and when to do that. - semantics=ModelBuilderSemantics(types=[AddOperator, MulOperator]) +Take a look at ``tatsu.ngcodegen.PythonCodeGenerator`` for the walker that generates +a parser in Python from the model of a parsed grammar. -``ModelBuilderSemantics`` assumes nothing about ``types=``, so any -constructor (a function, or a partial function) can be used. Model Class Hierarchies ~~~~~~~~~~~~~~~~~~~~~~~ From 1d84b2568b7219f2b9a0f54cf4d79551458c6865 Mon Sep 17 00:00:00 2001 From: apalala Date: Sat, 9 Dec 2023 12:42:48 -0400 Subject: [PATCH 37/51] [docs] refactor translation --- docs/declarative_translation.rst | 94 ------------------------- docs/index.rst | 1 - docs/translation.rst | 115 ++++++++++++++++++++++++++++--- 3 files changed, 105 insertions(+), 105 deletions(-) delete mode 100644 docs/declarative_translation.rst diff --git a/docs/declarative_translation.rst b/docs/declarative_translation.rst deleted file mode 100644 index 82189727..00000000 --- a/docs/declarative_translation.rst +++ /dev/null @@ -1,94 +0,0 @@ -.. include:: links.rst - -.. _mini-tutorial: mini-tutorial.rst - -.. _pegen: https://github.com/we-like-parsers/pegen -.. _PEG parser: https://peps.python.org/pep-0617/ - -Declarative Translation (Deprecated) ------------------------------------- - -Translation is one of the most common tasks in language processing. -Analysis often sumarizes the parsed input, and *walkers* are good for that. -In translation, the output can often be as verbose as the input, so a systematic approach that avoids bookkeeping as much as possible is convenient. - -|TatSu| provides support for template-based code generation ("translation", see below) -in the ``tatsu.codegen`` module. -Code generation works by defining a translation class for each class in the model specified by the grammar. - -Nowadays the preferred code generation strategy is to walk down the AST_ and `print()` the desired output, -with the help of the ``NodWalker`` class, and the ``IndentPrintMixin`` mixin. That's the strategy used -by pegen_, the precursor to the new `PEG parser`_ in Python_. Please take a lookt at the -`mini-tutorial`_ for an example. - -Basically, the code generation strategy changed from declarative with library support, to procedural, -breadth or depth first, using only standard Python_. The procedural code must know the AST_ structure -to navigate it, although other strategies are available with ``PreOrderWalker``, ``DepthFirstWalker``, -and ``ContextWalker``. - -**deprecated** - -|TatSu| doesn't impose a way to create translators with it, but it -exposes the facilities it uses to generate the `Python`_ source code for -parsers. - -Translation in |TatSu| was *template-based*, but instead of defining or -using a complex templating engine (yet another language), it relies on -the simple but powerful ``string.Formatter`` of the `Python`_ standard -library. The templates are simple strings that, in |TatSu|'s style, -are inlined with the code. - -To generate a parser, |TatSu| constructs an object model of the parsed -grammar. A ``tatsu.codegen.CodeGenerator`` instance matches model -objects to classes that descend from ``tatsu.codegen.ModelRenderer`` and -implement the translation and rendering using string templates. -Templates are left-trimmed on whitespace, like `Python`_ *doc-comments* -are. This is an example taken from |TatSu|'s source code: - -.. code:: python - - class Lookahead(ModelRenderer): - template = '''\ - with self._if(): - {exp:1::}\ - ''' - -Every *attribute* of the object that doesn't start with an underscore -(``_``) may be used as a template field, and fields can be added or -modified by overriding the ``render_fields(fields)`` method. Fields -themselves are *lazily rendered* before being expanded by the template, -so a field may be an instance of a ``ModelRenderer`` descendant. - -The ``rendering`` module defines a ``Formatter`` enhanced to support the -rendering of items in an *iterable* one by one. The syntax to achieve -that is: - -.. code:: python - - ''' - {fieldname:ind:sep:fmt} - ''' - -All of ``ind``, ``sep``, and ``fmt`` are optional, but the three -*colons* are not. A field specified that way will be rendered using: - -.. code:: python - - indent(sep.join(fmt % render(v) for v in value), ind) - -The extended format can also be used with non-iterables, in which case -the rendering will be: - -.. code:: python - - indent(fmt % render(value), ind) - -The default multiplier for ``ind`` is ``4``, but that can be overridden -using ``n*m`` (for example ``3*1``) in the format. - -**note** - Using a newline character (``\n``) as separator will interfere with - left trimming and indentation of templates. To use a newline as - separator, specify it as ``\\n``, and the renderer will understand - the intention. - diff --git a/docs/index.rst b/docs/index.rst index dab6bc5b..3163e585 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -47,7 +47,6 @@ input, much like the `re`_ module does with regular expressions, or it can gener semantics models translation - declarative_translation left_recursion mini-tutorial traces diff --git a/docs/translation.rst b/docs/translation.rst index 60afc1b5..9a8277be 100644 --- a/docs/translation.rst +++ b/docs/translation.rst @@ -1,5 +1,10 @@ .. include:: links.rst +.. _mini-tutorial: mini-tutorial.rst + +.. _pegen: https://github.com/we-like-parsers/pegen +.. _PEG parser: https://peps.python.org/pep-0617/ + Translation ----------- @@ -11,9 +16,13 @@ Analysis often sumarizes the parsed input, and *walkers* are good for that. exposes the facilities it uses to generate the `Python`_ source code for parsers. -Translation in |TatSu| is based on subclasses of ``Walker`` and on classes that -inherit from ``IndentPrintMixin``, a strategy copied from the new PEG_ parser -in Python_ (see `PEP 617`_). + +Print Translation +~~~~~~~~~~~~~~~~~ + +Translation in |TatSu| is based on subclasses of ``NodeWalker``. Print-based translation +relies on classes that inherit from ``IndentPrintMixin``, a strategy copied from +the new PEG_ parser in Python_ (see `PEP 617`_). ``IndentPrintMixin`` provides an ``indent()`` method, which is a context manager, and should be used thus: @@ -22,32 +31,118 @@ and should be used thus: class MyTranslationWalker(NodeWalker, IndentPrintMixin): - def walk_SomeNode(self, node): + def walk_SomeNodeType(self, node: NodeType): self.print('some preamble') with self.indent(): # continue walking the tree + self.print('something else') The ``self.print()`` method takes note of the current level of indentation, so output will be indented by the `indent` passed to -the ``IndentPrintMixin`` constructor, or to the ``indent(iamoun:int)`` method. +the ``IndentPrintMixin`` constructor, or to the ``indent(amount: int)`` method. The mixin keeps as stack of the indent ammounts so it can go back to where it was after each ``with indent(amount=n):`` statement: .. code:: python - def walk_SomeNode(self, node): + def walk_SomeNodeType(self, node: NodeType): with self.indent(amount=2): - self.print(walk_expression(node.exp)) + self.print(node.exp) The printed code can be retrieved using the ``printed_text()`` method, but other -posibilities are available by assigning a text-like object to +posibilities are available by assigning a stream-like object to ``self.output_stream`` in the ``__init__()`` method. -A good example of how to do code generation with a ``NodeWalker`` is |TatSu|'s own -code generator, which can be found in ``tatsu/ngcodegen/python.py``, or the model +A good example of how to do code generation with a ``NodeWalker`` and ``IndentPrintMixin`` +is |TatSu|'s own code generator, which can be +found in ``tatsu/ngcodegen/python.py``, or the model generation found in ``tatsu/ngcodegen/objectomdel.py``. .. _PEP 617: https://peps.python.org/pep-0617/ + + +Declarative Translation (deprecated) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +|TatSu| provides support for template-based code generation ("translation", see below) +in the ``tatsu.codegen`` module. +Code generation works by defining a translation class for each class in the model specified by the grammar. + +Nowadays the preferred code generation strategy is to walk down the AST_ and `print()` the desired output, +with the help of the ``NodWalker`` class, and the ``IndentPrintMixin`` mixin. That's the strategy used +by pegen_, the precursor to the new `PEG parser`_ in Python_. Please take a lookt at the +`mini-tutorial`_ for an example. + +Basically, the code generation strategy changed from declarative with library support, to procedural, +breadth or depth first, using only standard Python_. The procedural code must know the AST_ structure +to navigate it, although other strategies are available with ``PreOrderWalker``, ``DepthFirstWalker``, +and ``ContextWalker``. + +|TatSu| doesn't impose a way to create translators with it, but it +exposes the facilities it uses to generate the `Python`_ source code for +parsers. + +Translation in |TatSu| was *template-based*, but instead of defining or +using a complex templating engine (yet another language), it relies on +the simple but powerful ``string.Formatter`` of the `Python`_ standard +library. The templates are simple strings that, in |TatSu|'s style, +are inlined with the code. + +To generate a parser, |TatSu| constructs an object model of the parsed +grammar. A ``tatsu.codegen.CodeGenerator`` instance matches model +objects to classes that descend from ``tatsu.codegen.ModelRenderer`` and +implement the translation and rendering using string templates. +Templates are left-trimmed on whitespace, like `Python`_ *doc-comments* +are. This is an example taken from |TatSu|'s source code: + +.. code:: python + + class Lookahead(ModelRenderer): + template = '''\ + with self._if(): + {exp:1::}\ + ''' + +Every *attribute* of the object that doesn't start with an underscore +(``_``) may be used as a template field, and fields can be added or +modified by overriding the ``render_fields(fields)`` method. Fields +themselves are *lazily rendered* before being expanded by the template, +so a field may be an instance of a ``ModelRenderer`` descendant. + +The ``rendering`` module defines a ``Formatter`` enhanced to support the +rendering of items in an *iterable* one by one. The syntax to achieve +that is: + +.. code:: python + + ''' + {fieldname:ind:sep:fmt} + ''' + +All of ``ind``, ``sep``, and ``fmt`` are optional, but the three +*colons* are not. A field specified that way will be rendered using: + +.. code:: python + + indent(sep.join(fmt % render(v) for v in value), ind) + +The extended format can also be used with non-iterables, in which case +the rendering will be: + +.. code:: python + + indent(fmt % render(value), ind) + +The default multiplier for ``ind`` is ``4``, but that can be overridden +using ``n*m`` (for example ``3*1``) in the format. + +**note** + Using a newline character (``\n``) as separator will interfere with + left trimming and indentation of templates. To use a newline as + separator, specify it as ``\\n``, and the renderer will understand + the intention. + From 40e9dd21e2ea7f990171a4f21d040b4bf450e13a Mon Sep 17 00:00:00 2001 From: apalala Date: Sat, 9 Dec 2023 14:47:49 -0400 Subject: [PATCH 38/51] [ngcodegen] fix bugs --- tatsu/ngcodegen/objectmodel.py | 6 +++--- tatsu/ngcodegen/python.py | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index e683b5cd..a6769b28 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -1,5 +1,4 @@ from collections import namedtuple -from typing import cast from .. import grammars, objectmodel from ..mixins.indent import IndentPrintMixin @@ -44,9 +43,10 @@ def __init__(self, context=None, types=None): BaseClassSpec = namedtuple('BaseClassSpec', ['class_name', 'base']) -def modelgen(model: grammars.Model, name: str = '', base_type: type = objectmodel.Node) -> str: +def modelgen(model: grammars.Grammar, name: str = '', base_type: type = objectmodel.Node | None) -> str: + base_type = base_type or objectmodel.Node generator = PythonModelGenerator(name=name, base_type=base_type) - return generator.generate_model(cast(grammars.Grammar, model)) + return generator.generate_model(model) class PythonModelGenerator(IndentPrintMixin): diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index a8a46ac4..cf20ff33 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -86,8 +86,8 @@ def walk_Grammar(self, grammar: grammars.Grammar): self.print() self._gen_keywords(grammar) - self._gen_buffering(grammar) - self._gen_parsing(grammar) + self._gen_buffering(grammar, parser_name) + self._gen_parsing(grammar, parser_name) self.print() self.print(FOOTER.format(name=parser_name)) @@ -320,7 +320,7 @@ def _gen_init(self, grammar: grammars.Grammar): whitespace={grammar.config.whitespace!r}, nameguard={grammar.config.nameguard}, ignorecase={grammar.config.ignorecase}, - namechars={grammar.config.namechars or None}, + namechars={grammar.config.namechars!r}, parseinfo={grammar.config.parseinfo}, comments_re={grammar.config.comments_re!r}, eol_comments_re={grammar.config.eol_comments_re!r}, @@ -332,8 +332,8 @@ def _gen_init(self, grammar: grammars.Grammar): ) self.print() - def _gen_buffering(self, grammar: grammars.Grammar): - self.print(f'class {self.parser_name}Buffer(Buffer):') + def _gen_buffering(self, grammar: grammars.Grammar, parser_name: str): + self.print(f'class {parser_name}Buffer(Buffer):') with self.indent(): self.print('def __init__(self, text, /, config: ParserConfig | None = None, **settings):') @@ -343,8 +343,8 @@ def _gen_buffering(self, grammar: grammars.Grammar): self.print() - def _gen_parsing(self, grammar: grammars.Grammar): - self.print(f'class {self.parser_name}Parser(Parser):') + def _gen_parsing(self, grammar: grammars.Gramma, parser_name: strr): + self.print(f'class {parser_name}Parser(Parser):') with self.indent(): self.print('def __init__(self, /, config: ParserConfig | None = None, **settings):') with self.indent(): From 9cc66aff0a37bd200c695cef70d7a9e84d44de15 Mon Sep 17 00:00:00 2001 From: apalala Date: Sat, 9 Dec 2023 14:52:25 -0400 Subject: [PATCH 39/51] [tool] replace code generation with ng --- tatsu/tool.py | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/tatsu/tool.py b/tatsu/tool.py index 5c840421..c830facd 100644 --- a/tatsu/tool.py +++ b/tatsu/tool.py @@ -12,10 +12,6 @@ from pathlib import Path from ._version import __version__ -from .codegen import objectmodel - -# we hook the tool to the Python code generator as the default -from .codegen.python import codegen as pythoncg from .exceptions import ParseException from .infos import ParserConfig from .ngcodegen import codegen as ngpythoncg @@ -42,16 +38,6 @@ def parse_args(): help='generate parser code from the grammar (default)', action='store_true', ) - main_mode.add_argument( - '--ng-parser', - help='generate parser code from the grammar using new code generator', - action='store_true', - ) - main_mode.add_argument( - '--ng-model', - help='generate a model from the grammar using the new code generator', - action='store_true', - ) main_mode.add_argument( '--draw', '-d', @@ -237,7 +223,7 @@ def to_python_sourcecode( model = compile( grammar, name=name, filename=filename, config=config, **settings, ) - return pythoncg(model) + return ngpythoncg(model) def to_python_model( @@ -251,7 +237,7 @@ def to_python_model( model = compile( grammar, name=name, filename=filename, config=config, **settings, ) - return objectmodel.codegen(model, base_type=base_type) + return ngobjectmodel.modelgen(model, base_type=base_type) # for backwards compatibility. Use `compile()` instead @@ -275,7 +261,7 @@ def gencode( grammar=None, trace=False, filename=None, - codegen=pythoncg, + codegen=ngpythoncg, config: ParserConfig | None = None, **settings, ): @@ -339,13 +325,9 @@ def main(): elif args.pretty_lean: result = model.pretty_lean() elif args.object_model: - result = objectmodel.codegen(model, base_type=args.base_type) - elif args.ng_parser: - result = ngpythoncg(model) - elif args.ng_model: - result = ngobjectmodel.modelgen(model, name=args.name) + result = ngobjectmodel.modelgen(model, base_type=args.base_type) else: - result = pythoncg(model) + result = ngpythoncg(model) if outfile: save(outfile, result) @@ -356,7 +338,7 @@ def main(): if args.object_model_outfile: save( args.object_model_outfile, - objectmodel.codegen(model, base_type=args.base_type), + ngobjectmodel.modelgen(model, base_type=args.base_type), ) print('-' * 72, file=sys.stderr) From 1db66557bbb4b078d3315ec3b93d6c2a26629030 Mon Sep 17 00:00:00 2001 From: apalala Date: Sat, 9 Dec 2023 16:19:54 -0400 Subject: [PATCH 40/51] [ngcodegen][model] debug --- tatsu/ngcodegen/objectmodel.py | 58 ++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index a6769b28..05ba0661 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -20,21 +20,16 @@ from typing import Any from dataclasses import dataclass - + from tatsu.semantics import ModelBuilderSemantics {base_type_import} - - - @dataclass(eq=False) - class {name}ModelBase({base_type}): - pass - - + + class {name}ModelBuilderSemantics(ModelBuilderSemantics): def __init__(self, context=None, types=None): types = [ t for t in globals().values() - if type(t) is type and issubclass(t, {name}ModelBase) + if type(t) is type and issubclass(t, ModelBase) ] + (types or []) super().__init__(context=context, types=types) """ @@ -58,7 +53,8 @@ def __init__(self, name: str = '', base_type: type = objectmodel.Node): def generate_model(self, grammar: grammars.Grammar): base_type = self.base_type - base_type_import = f"from {base_type.__module__} import {base_type.__name__.split('.')[-1]}" + base_type_name = base_type.__name__.split('.')[-1] + base_type_import = f"from {base_type.__module__} import {base_type_name}" self.name = self.name or grammar.name self.print( @@ -68,35 +64,43 @@ def generate_model(self, grammar: grammars.Grammar): base_type_import=base_type_import, ), ) - self.print() - self.print() rule_index = {rule.name: rule for rule in grammar.rules} rule_specs = { rule.name: self._base_class_specs(rule) for rule in grammar.rules } + all_base_spec = { + s.class_name: s + for specs in rule_specs.values() + for s in specs + } + base = self._model_base_class_name() + all_base_spec[base] = BaseClassSpec(base, base_type_name) - model_classes = {s.class_name for spec in rule_specs.values() for s in spec} - base_classes = {s.base for spec in rule_specs.values() for s in spec} - base_classes -= model_classes + base_classes = [] + for s in all_base_spec.values(): + if s.base not in base_classes: + base_classes.append(s.base) - for base_name in base_classes: - if base_name in rule_specs: - self._gen_base_class(rule_specs[base_name][0]) + for base_name in base_classes[:-1]: + self._gen_base_class(all_base_spec[base_name]) for model_name, rule in rule_index.items(): - if model_name in rule_index: - self._gen_rule_class( - rule, - rule_specs[model_name], - ) + self._gen_rule_class( + rule, + rule_specs[model_name], + ) return self.printed_text() + def _model_base_class_name(self): + return f'ModelBase' + def _gen_base_class(self, spec: BaseClassSpec): self.print() self.print() + self.print('@dataclass(eq=False)') if spec.base: self.print(f'class {spec.class_name}({spec.base}):') else: @@ -122,11 +126,9 @@ def _gen_rule_class(self, rule: grammars.Rule, specs: list[BaseClassSpec]): self.print(f'{arg}: Any = None') def _base_class_specs(self, rule: grammars.Rule) -> list[BaseClassSpec]: - if not rule.params: - return [] - - spec = rule.params[0].split('::') - class_names = [safe_name(n) for n in spec] + [f'{self.name}ModelBase'] + spec = rule.params[0].split('::') if rule.params else [] + base = [self._model_base_class_name()] + class_names = [safe_name(n) for n in spec] + base return [ BaseClassSpec(class_name, class_names[i + 1]) for i, class_name in enumerate(class_names[:-1]) From 908549f4d46b43ff019450c31786d0e5b4d7e5ba Mon Sep 17 00:00:00 2001 From: apalala Date: Sat, 9 Dec 2023 17:31:03 -0400 Subject: [PATCH 41/51] [ngcodegen] fix gut with rendering whitespace --- tatsu/ngcodegen/python.py | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index cf20ff33..548160eb 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -309,26 +309,29 @@ def _gen_keywords(self, grammar: grammars.Grammar): self.print() self.print() - def _gen_init(self, grammar: grammars.Grammar): start = grammar.config.start or grammar.rules[0].name + + whitespace = grammar.config.whitespace + whitespace = repr(whitespace) if whitespace else None + self.print( f''' - config = ParserConfig.new( - config, - owner=self, - whitespace={grammar.config.whitespace!r}, - nameguard={grammar.config.nameguard}, - ignorecase={grammar.config.ignorecase}, - namechars={grammar.config.namechars!r}, - parseinfo={grammar.config.parseinfo}, - comments_re={grammar.config.comments_re!r}, - eol_comments_re={grammar.config.eol_comments_re!r}, - keywords=KEYWORDS, - start={start!r}, - ) - config = config.replace(**settings) - ''', + config = ParserConfig.new( + config, + owner=self, + whitespace={whitespace}, + nameguard={grammar.config.nameguard}, + ignorecase={grammar.config.ignorecase}, + namechars={grammar.config.namechars!r}, + parseinfo={grammar.config.parseinfo}, + comments_re={grammar.config.comments_re!r}, + eol_comments_re={grammar.config.eol_comments_re!r}, + keywords=KEYWORDS, + start={start!r}, + ) + config = config.replace(**settings) + ''', ) self.print() From ad1bc9cb5e4975b1f260179856d8e6ab03825b80 Mon Sep 17 00:00:00 2001 From: apalala Date: Sun, 10 Dec 2023 09:09:30 -0400 Subject: [PATCH 42/51] [ngcodegen][model] use topological sort for order of model classes --- tatsu/ngcodegen/objectmodel.py | 46 +++++++++++++++++++++------------- tatsu/util/misc.py | 34 ++++++++++++++++++++++--- test/grammar/semantics_test.py | 1 + 3 files changed, 61 insertions(+), 20 deletions(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index 05ba0661..7d2b48f3 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -3,6 +3,7 @@ from .. import grammars, objectmodel from ..mixins.indent import IndentPrintMixin from ..util import compress_seq, safe_name +from ..util.misc import topological_sort HEADER = """\ #!/usr/bin/env python3 @@ -70,42 +71,53 @@ def generate_model(self, grammar: grammars.Grammar): rule.name: self._base_class_specs(rule) for rule in grammar.rules } + rule_specs = {name: specs for name, specs in rule_specs.items() if specs} + all_base_spec = { - s.class_name: s + s.class_name: s.base for specs in rule_specs.values() for s in specs } base = self._model_base_class_name() - all_base_spec[base] = BaseClassSpec(base, base_type_name) + all_base_spec[base] = base_type_name - base_classes = [] - for s in all_base_spec.values(): - if s.base not in base_classes: - base_classes.append(s.base) + all_model_names = list(reversed(all_base_spec.keys())) + all_specs = { + (s.class_name, s.base) + for specs in rule_specs.values() + for s in specs + } - for base_name in base_classes[:-1]: - self._gen_base_class(all_base_spec[base_name]) + self.print('#', all_specs) + self.print('#', all_model_names) + all_model_names = topological_sort(all_model_names, all_specs) + self.print('#', all_model_names) + model_to_rule = { + rule_specs[name][0].class_name: rule + for name, rule in rule_index.items() + if name in rule_specs + } - for model_name, rule in rule_index.items(): - self._gen_rule_class( - rule, - rule_specs[model_name], - ) + for model_name in all_model_names: + if rule := model_to_rule.get(model_name): + self._gen_rule_class(rule, rule_specs[rule.name]) + else: + self._gen_base_class(model_name, all_base_spec.get(model_name)) return self.printed_text() def _model_base_class_name(self): return f'ModelBase' - def _gen_base_class(self, spec: BaseClassSpec): + def _gen_base_class(self, class_name: str, base: str | None): self.print() self.print() self.print('@dataclass(eq=False)') - if spec.base: - self.print(f'class {spec.class_name}({spec.base}):') + if base: + self.print(f'class {class_name}({base}): # base') else: # FIXME: this cannot happen as base_type is the final base - self.print(f'class {spec.class_name}:') + self.print(f'class {class_name}:') with self.indent(): self.print('pass') diff --git a/tatsu/util/misc.py b/tatsu/util/misc.py index 0acc89ab..7f6d67a5 100644 --- a/tatsu/util/misc.py +++ b/tatsu/util/misc.py @@ -1,9 +1,13 @@ from __future__ import annotations +from typing import TypeVar import re from ._common import RETYPE + +_T = TypeVar('_T') + _undefined = object() # unique object for when None is not a good default @@ -71,9 +75,7 @@ def findalliter(pattern, string, pos=None, endpos=None, flags=0): yield match_to_find(m) -def findfirst( - pattern, string, pos=None, endpos=None, flags=0, default=_undefined, -): +def findfirst( pattern, string, pos=None, endpos=None, flags=0, default=_undefined): """ Avoids using the inefficient findall(...)[0], or first(findall(...)) """ @@ -81,3 +83,29 @@ def findfirst( findalliter(pattern, string, pos=pos, endpos=endpos, flags=flags), default=default, ) + + +def topological_sort(nodes: list[_T], order: set[tuple[_T, _T]]) -> list[_T]: + # https://en.wikipedia.org/wiki/Topological_sorting + + order = set(order) + result = [] # Empty list that will contain the sorted elements + + pending = [ # Set of all nodes with no incoming edge + n for n in nodes + if not any(x for (x, y) in order if y == n) + ] + while pending: + n = pending.pop() + result.insert(0, n) + outgoing = {m for (x, m) in order if x == n} + for m in outgoing: + order.remove((n, m)) + if not any(x for x, y in order if y == m): + # m has no other incoming edges then + pending.append(m) + + if order: + raise ValueError('There are cycles in the graph') + + return result # a topologically sorted list diff --git a/test/grammar/semantics_test.py b/test/grammar/semantics_test.py index d299a1c0..51deebd7 100644 --- a/test/grammar/semantics_test.py +++ b/test/grammar/semantics_test.py @@ -81,6 +81,7 @@ def test_builder_basetype_codegen(self): from tatsu.tool import to_python_model src = to_python_model(grammar, base_type=MyNode) + print(src) globals = {} exec(src, globals) # pylint: disable=W0122 From ef23c64e800b9179af7cdd75895a7f1d067c1e8b Mon Sep 17 00:00:00 2001 From: apalala Date: Sun, 10 Dec 2023 09:17:32 -0400 Subject: [PATCH 43/51] [util][misc] document topological_sort --- tatsu/util/misc.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tatsu/util/misc.py b/tatsu/util/misc.py index 7f6d67a5..2d73739c 100644 --- a/tatsu/util/misc.py +++ b/tatsu/util/misc.py @@ -1,5 +1,5 @@ from __future__ import annotations -from typing import TypeVar +from typing import Iterable, TypeVar import re @@ -85,7 +85,7 @@ def findfirst( pattern, string, pos=None, endpos=None, flags=0, default=_undefin ) -def topological_sort(nodes: list[_T], order: set[tuple[_T, _T]]) -> list[_T]: +def topological_sort(nodes: Iterable[_T], order: Iterable[tuple[_T, _T]]) -> list[_T]: # https://en.wikipedia.org/wiki/Topological_sorting order = set(order) @@ -98,11 +98,13 @@ def topological_sort(nodes: list[_T], order: set[tuple[_T, _T]]) -> list[_T]: while pending: n = pending.pop() result.insert(0, n) + outgoing = {m for (x, m) in order if x == n} + # node m with an edge e from n to m for m in outgoing: order.remove((n, m)) if not any(x for x, y in order if y == m): - # m has no other incoming edges then + # m has no other incoming edges pending.append(m) if order: From 8cefbaea35dfa27f3496201a15618440ff5594f9 Mon Sep 17 00:00:00 2001 From: apalala Date: Sun, 10 Dec 2023 09:23:47 -0400 Subject: [PATCH 44/51] remove debugging statements --- tatsu/ngcodegen/objectmodel.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index 7d2b48f3..fb099c3f 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -88,10 +88,7 @@ def generate_model(self, grammar: grammars.Grammar): for s in specs } - self.print('#', all_specs) - self.print('#', all_model_names) all_model_names = topological_sort(all_model_names, all_specs) - self.print('#', all_model_names) model_to_rule = { rule_specs[name][0].class_name: rule for name, rule in rule_index.items() From 8e3004c30f02be03056de20bdfb03872c7f3139a Mon Sep 17 00:00:00 2001 From: apalala Date: Sun, 10 Dec 2023 09:40:03 -0400 Subject: [PATCH 45/51] remove debug code --- tatsu/ngcodegen/objectmodel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index fb099c3f..96b7e13c 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -111,7 +111,7 @@ def _gen_base_class(self, class_name: str, base: str | None): self.print() self.print('@dataclass(eq=False)') if base: - self.print(f'class {class_name}({base}): # base') + self.print(f'class {class_name}({base}):') else: # FIXME: this cannot happen as base_type is the final base self.print(f'class {class_name}:') From b356d2af5764ad462c51fdbb7a339da36411ed49 Mon Sep 17 00:00:00 2001 From: apalala Date: Sun, 10 Dec 2023 10:44:55 -0400 Subject: [PATCH 46/51] [lint] resolve warnings --- tatsu/ngcodegen/python.py | 3 +-- tatsu/util/misc.py | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/tatsu/ngcodegen/python.py b/tatsu/ngcodegen/python.py index 548160eb..ac4eca6d 100644 --- a/tatsu/ngcodegen/python.py +++ b/tatsu/ngcodegen/python.py @@ -345,8 +345,7 @@ def _gen_buffering(self, grammar: grammars.Grammar, parser_name: str): self.print('super().__init__(text, config=config)') self.print() - - def _gen_parsing(self, grammar: grammars.Gramma, parser_name: strr): + def _gen_parsing(self, grammar: grammars.Grammar, parser_name: str): self.print(f'class {parser_name}Parser(Parser):') with self.indent(): self.print('def __init__(self, /, config: ParserConfig | None = None, **settings):') diff --git a/tatsu/util/misc.py b/tatsu/util/misc.py index 2d73739c..a8396bbf 100644 --- a/tatsu/util/misc.py +++ b/tatsu/util/misc.py @@ -1,11 +1,11 @@ from __future__ import annotations -from typing import Iterable, TypeVar import re +from collections.abc import Iterable +from typing import TypeVar from ._common import RETYPE - _T = TypeVar('_T') _undefined = object() # unique object for when None is not a good default @@ -75,7 +75,7 @@ def findalliter(pattern, string, pos=None, endpos=None, flags=0): yield match_to_find(m) -def findfirst( pattern, string, pos=None, endpos=None, flags=0, default=_undefined): +def findfirst(pattern, string, pos=None, endpos=None, flags=0, default=_undefined): """ Avoids using the inefficient findall(...)[0], or first(findall(...)) """ @@ -89,7 +89,7 @@ def topological_sort(nodes: Iterable[_T], order: Iterable[tuple[_T, _T]]) -> lis # https://en.wikipedia.org/wiki/Topological_sorting order = set(order) - result = [] # Empty list that will contain the sorted elements + result: list[_T] = [] # Empty list that will contain the sorted elements pending = [ # Set of all nodes with no incoming edge n for n in nodes From db369b0c1256ef20be32ed09992dda7b9d642f2a Mon Sep 17 00:00:00 2001 From: apalala Date: Sun, 10 Dec 2023 10:45:13 -0400 Subject: [PATCH 47/51] [ngcodegen][model] do not generate model classes for builtins --- tatsu/ngcodegen/objectmodel.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/tatsu/ngcodegen/objectmodel.py b/tatsu/ngcodegen/objectmodel.py index 96b7e13c..4d3964c1 100644 --- a/tatsu/ngcodegen/objectmodel.py +++ b/tatsu/ngcodegen/objectmodel.py @@ -1,3 +1,4 @@ +import builtins from collections import namedtuple from .. import grammars, objectmodel @@ -21,11 +22,11 @@ from typing import Any from dataclasses import dataclass - + from tatsu.semantics import ModelBuilderSemantics {base_type_import} - - + + class {name}ModelBuilderSemantics(ModelBuilderSemantics): def __init__(self, context=None, types=None): types = [ @@ -39,7 +40,7 @@ def __init__(self, context=None, types=None): BaseClassSpec = namedtuple('BaseClassSpec', ['class_name', 'base']) -def modelgen(model: grammars.Grammar, name: str = '', base_type: type = objectmodel.Node | None) -> str: +def modelgen(model: grammars.Grammar, name: str = '', base_type: type | None = objectmodel.Node) -> str: base_type = base_type or objectmodel.Node generator = PythonModelGenerator(name=name, base_type=base_type) return generator.generate_model(model) @@ -96,6 +97,8 @@ def generate_model(self, grammar: grammars.Grammar): } for model_name in all_model_names: + if model_name in dir(builtins): + continue if rule := model_to_rule.get(model_name): self._gen_rule_class(rule, rule_specs[rule.name]) else: @@ -104,7 +107,7 @@ def generate_model(self, grammar: grammars.Grammar): return self.printed_text() def _model_base_class_name(self): - return f'ModelBase' + return 'ModelBase' def _gen_base_class(self, class_name: str, base: str | None): self.print() From 3d4b066dcaa20cd4e57452b961b2189eaab983b9 Mon Sep 17 00:00:00 2001 From: apalala Date: Sun, 10 Dec 2023 10:58:29 -0400 Subject: [PATCH 48/51] remove reference to Py27 --- tatsu/semantics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tatsu/semantics.py b/tatsu/semantics.py index 36cb6a62..01a75775 100644 --- a/tatsu/semantics.py +++ b/tatsu/semantics.py @@ -60,7 +60,7 @@ def _find_existing_constructor(self, typename): return constructor def _get_constructor(self, typename, base): - typename = str(typename) # cannot be unicode in Python 2.7 + typename = str(typename) if typename in self.constructors: return self.constructors[typename] From 224612c0a70aa8398e6dea5e446efd8dd65c3f27 Mon Sep 17 00:00:00 2001 From: apalala Date: Sun, 10 Dec 2023 11:46:49 -0400 Subject: [PATCH 49/51] [bootstrap] make the generated parser be the bootstrap parser --- tatsu/bootstrap.py | 545 +++++++++++--------------------------- tatsu/ngcodegen/python.py | 2 +- 2 files changed, 160 insertions(+), 387 deletions(-) diff --git a/tatsu/bootstrap.py b/tatsu/bootstrap.py index 6c3be580..ce462d20 100644 --- a/tatsu/bootstrap.py +++ b/tatsu/bootstrap.py @@ -1,15 +1,15 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 -# CAVEAT UTILITOR +# WARNING: CAVEAT UTILITOR # -# This file was automatically generated by TatSu. +# This file was automatically generated by TatSu. # -# https://pypi.python.org/pypi/tatsu/ +# https://pypi.python.org/pypi/tatsu/ # -# Any changes you make to it will be overwritten the next time -# the file is generated. +# Any changes you make to it will be overwritten the next time +# the file is generated. -# ruff: noqa: C405, I001, F401, SIM117 +# ruff: noqa: C405, COM812, I001, F401, SIM117 import sys from pathlib import Path @@ -30,7 +30,7 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings): config = ParserConfig.new( config, owner=self, - whitespace=re.compile(r"\s+"), + whitespace='\\s+', nameguard=None, ignorecase=False, namechars='', @@ -41,38 +41,39 @@ def __init__(self, text, /, config: ParserConfig | None = None, **settings): start='start', ) config = config.replace(**settings) - super().__init__(text, config=config) + super().__init__(text, config=config) class EBNFBootstrapParser(Parser): def __init__(self, /, config: ParserConfig | None = None, **settings): config = ParserConfig.new( config, owner=self, - whitespace=re.compile(r"\s+"), + whitespace='\\s+', nameguard=None, ignorecase=False, namechars='', parseinfo=True, comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]', eol_comments_re='#[^\\n]*$', - left_recursion=False, keywords=KEYWORDS, start='start', ) config = config.replace(**settings) + super().__init__(config=config) @tatsumasu() def _start_(self): self._grammar_() + @tatsumasu('Grammar') def _grammar_(self): self._constant('TATSU') self.name_last_node('title') - def block1(): + def block0(): with self._choice(): with self._option(): self._directive_() @@ -82,13 +83,13 @@ def block1(): self.add_last_node_to_name('keywords') self._error( 'expecting one of: ' - ' ' # noqa: COM812 + ' ' ) - self._closure(block1) + self._closure(block0) self._rule_() self.add_last_node_to_name('rules') - def block6(): + def block1(): with self._choice(): with self._option(): self._rule_() @@ -98,10 +99,14 @@ def block6(): self.add_last_node_to_name('keywords') self._error( 'expecting one of: ' - ' ' # noqa: COM812 + ' ' ) - self._closure(block6) + self._closure(block1) self._check_eof() + self._define( + ['title'], + ['directives', 'keywords', 'rules'], + ) self._define( ['title'], @@ -125,7 +130,7 @@ def _directive_(self): self._token('eol_comments') self._error( 'expecting one of: ' - "'comments' 'eol_comments'" # noqa: COM812 + "'comments' 'eol_comments'" ) self.name_last_node('name') self._cut() @@ -134,11 +139,7 @@ def _directive_(self): self._cut() self._regex_() self.name_last_node('value') - - self._define( - ['name', 'value'], - [], - ) + self._define(['name', 'value'], []) with self._option(): with self._group(): self._token('whitespace') @@ -159,14 +160,10 @@ def _directive_(self): self._constant('None') self._error( 'expecting one of: ' - "'False' 'None' " # noqa: COM812 + "'False' 'None' " ) self.name_last_node('value') - - self._define( - ['name', 'value'], - [], - ) + self._define(['name', 'value'], []) with self._option(): with self._group(): with self._choice(): @@ -181,7 +178,7 @@ def _directive_(self): self._error( 'expecting one of: ' "'ignorecase' 'left_recursion'" - "'nameguard' 'parseinfo'" # noqa: COM812 + "'nameguard' 'parseinfo'" ) self.name_last_node('name') self._cut() @@ -192,23 +189,15 @@ def _directive_(self): self._cut() self._boolean_() self.name_last_node('value') - - self._define( - ['value'], - [], - ) + self._define(['value'], []) with self._option(): self._constant(True) self.name_last_node('value') self._error( 'expecting one of: ' - "'::'" # noqa: COM812 + "'::'" ) - - self._define( - ['name', 'value'], - [], - ) + self._define(['name', 'value'], []) with self._option(): with self._group(): self._token('grammar') @@ -218,11 +207,7 @@ def _directive_(self): self._cut() self._word_() self.name_last_node('value') - - self._define( - ['name', 'value'], - [], - ) + self._define(['name', 'value'], []) with self._option(): with self._group(): self._token('namechars') @@ -232,24 +217,18 @@ def _directive_(self): self._cut() self._string_() self.name_last_node('value') - - self._define( - ['name', 'value'], - [], - ) + self._define(['name', 'value'], []) self._error( 'expecting one of: ' "'comments' 'eol_comments' 'grammar'" "'ignorecase' 'left_recursion'" "'namechars' 'nameguard' 'parseinfo'" - "'whitespace'" # noqa: COM812 + "'whitespace'" ) self._cut() + self._define(['name', 'value'], []) - self._define( - ['name', 'value'], - [], - ) + self._define(['name', 'value'], []) @tatsumasu() def _keywords_(self): @@ -258,6 +237,7 @@ def block0(): self._keywords_() self._positive_closure(block0) + @tatsumasu() def _keyword_(self): self._token('@@keyword') @@ -277,10 +257,11 @@ def block0(): self._token('=') self._error( 'expecting one of: ' - "':' '='" # noqa: COM812 + "':' '='" ) self._closure(block0) + @tatsumasu() def _paramdef_(self): with self._choice(): @@ -289,11 +270,7 @@ def _paramdef_(self): self._cut() self._params_() self.name_last_node('params') - - self._define( - ['params'], - [], - ) + self._define(['params'], []) with self._option(): self._token('(') self._cut() @@ -309,35 +286,28 @@ def _paramdef_(self): self._cut() self._kwparams_() self.name_last_node('kwparams') - - self._define( - ['kwparams', 'params'], - [], - ) + self._define(['kwparams', 'params'], []) with self._option(): self._params_() self.name_last_node('params') self._error( 'expecting one of: ' - ' ' # noqa: COM812 + ' ' ) self._token(')') - - self._define( - ['kwparams', 'params'], - [], - ) + self._define(['kwparams', 'params'], []) self._error( 'expecting one of: ' - "'(' '::'" # noqa: COM812 + "'(' '::'" ) + @tatsumasu('Rule') def _rule_(self): - def block1(): + def block0(): self._decorator_() - self._closure(block1) + self._closure(block0) self.name_last_node('decorators') self._name_() self.name_last_node('name') @@ -349,11 +319,7 @@ def block1(): self._cut() self._params_() self.name_last_node('params') - - self._define( - ['params'], - [], - ) + self._define(['params'], []) with self._option(): self._token('(') self._cut() @@ -369,49 +335,35 @@ def block1(): self._cut() self._kwparams_() self.name_last_node('kwparams') - - self._define( - ['kwparams', 'params'], - [], - ) + self._define(['kwparams', 'params'], []) with self._option(): self._params_() self.name_last_node('params') self._error( 'expecting one of: ' - ' ' # noqa: COM812 + ' ' ) self._token(')') - - self._define( - ['kwparams', 'params'], - [], - ) + self._define(['kwparams', 'params'], []) self._error( 'expecting one of: ' - "'(' '::'" # noqa: COM812 + "'(' '::'" ) with self._optional(): self._token('<') self._cut() self._known_name_() self.name_last_node('base') - - self._define( - ['base'], - [], - ) + self._define(['base'], []) self._token('=') self._cut() self._expre_() self.name_last_node('exp') self._token(';') self._cut() + self._define(['base', 'decorators', 'exp', 'kwparams', 'name', 'params'], []) - self._define( - ['base', 'decorators', 'exp', 'kwparams', 'name', 'params'], - [], - ) + self._define(['base', 'decorators', 'exp', 'kwparams', 'name', 'params'], []) @tatsumasu() def _decorator_(self): @@ -429,23 +381,25 @@ def _decorator_(self): self._token('nomemo') self._error( 'expecting one of: ' - "'name' 'nomemo' 'override'" # noqa: COM812 + "'name' 'nomemo' 'override'" ) self.name_last_node('@') + @tatsumasu() def _params_(self): self._first_param_() self.add_last_node_to_name('@') - def block1(): + def block0(): self._token(',') self._literal_() self.add_last_node_to_name('@') with self._ifnot(): self._token('=') self._cut() - self._closure(block1) + self._closure(block0) + @tatsumasu() def _first_param_(self): @@ -458,18 +412,20 @@ def _first_param_(self): 'expecting one of: ' '(?!\\d)\\w+(?:::(?!\\d)\\w+)+ ' ' ' - ' ' # noqa: COM812 + ' ' ) + @tatsumasu() def _kwparams_(self): def sep0(): self._token(',') - def block0(): + def block1(): self._pair_() - self._positive_gather(block0, sep0) + self._positive_gather(block1, sep0) + @tatsumasu() def _pair_(self): @@ -480,6 +436,7 @@ def _pair_(self): self._literal_() self.add_last_node_to_name('@') + @tatsumasu() def _expre_(self): with self._choice(): @@ -490,9 +447,10 @@ def _expre_(self): self._error( 'expecting one of: ' "'|'