From c8cefff2432619ebcd69eb8b740c647ab7e09a24 Mon Sep 17 00:00:00 2001 From: apalala Date: Fri, 13 Oct 2023 11:29:39 -0400 Subject: [PATCH] [codegen] do not render a default tokenizer/buffer --- tatsu/bootstrap.py | 18 ++------------ tatsu/codegen/python.py | 16 ------------- tatsu/grammars.py | 37 ----------------------------- tatsu/parser.py | 47 +++++++++++++++++++++++++++++++++---- test/grammar/syntax_test.py | 2 +- test/parsing_test.py | 2 +- 6 files changed, 47 insertions(+), 75 deletions(-) diff --git a/tatsu/bootstrap.py b/tatsu/bootstrap.py index cb93a677..5bcc3682 100644 --- a/tatsu/bootstrap.py +++ b/tatsu/bootstrap.py @@ -9,8 +9,6 @@ # Any changes you make to it will be overwritten the next time # the file is generated. -from __future__ import annotations - import sys from tatsu.buffering import Buffer @@ -26,20 +24,6 @@ } # type: ignore -class EBNFBootstrapBuffer(Buffer): - def __init__(self, text, /, config: ParserConfig | None = None, **settings): - config = ParserConfig.new( - config, - owner=self, - nameguard=None, - ignorecase=False, - namechars='', - parseinfo=True, - ) - config = config.replace(**settings) - super().__init__(text, config=config) - - class EBNFBootstrapParser(Parser): def __init__(self, /, config: ParserConfig | None = None, **settings): config = ParserConfig.new( @@ -49,6 +33,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings): ignorecase=False, namechars='', parseinfo=True, + comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]', + eol_comments_re='#[^\\n]*$', keywords=KEYWORDS, start='start', ) diff --git a/tatsu/codegen/python.py b/tatsu/codegen/python.py index 9fbe748e..3f35c9b7 100755 --- a/tatsu/codegen/python.py +++ b/tatsu/codegen/python.py @@ -530,22 +530,6 @@ def {name}(self, ast): # noqa KEYWORDS = {{{keywords}}} # type: ignore - class {name}Tokenizer(Buffer): - def __init__(self, text, /, config: ParserConfig | None = None, **settings): - config = ParserConfig.new( - config, - owner=self, - nameguard={nameguard}, - ignorecase={ignorecase}, - namechars={namechars}, - parseinfo={parseinfo}, - comments_re={comments_re}, - eol_comments_re={eol_comments_re}, - ) - config = config.replace(**settings) - super().__init__(text, config=config) - - class {name}Parser(Parser): def __init__(self, /, config: ParserConfig | None = None, **settings): config = ParserConfig.new( diff --git a/tatsu/grammars.py b/tatsu/grammars.py index 4b57cfb7..6e1fc949 100644 --- a/tatsu/grammars.py +++ b/tatsu/grammars.py @@ -15,7 +15,6 @@ from .ast import AST from .contexts import ParseContext from .objectmodel import Node -from .bootstrap import EBNFBootstrapBuffer from .infos import RuleInfo, ParserConfig from .leftrec import Nullable, find_left_recursion from .collections import OrderedSet as oset @@ -49,42 +48,6 @@ def pythonize_name(name): return ''.join('_' + c.lower() if c.isupper() else c for c in name) -class EBNFBuffer(EBNFBootstrapBuffer): - def __init__( - self, text, filename=None, comments_re=None, eol_comments_re=None, **kwargs): - super().__init__( - text, - filename=filename, - memoize_lookaheads=False, - comment_recovery=True, - comments_re=comments_re, - eol_comments_re=eol_comments_re, - **kwargs - ) - - def process_block(self, name, lines, index, **kwargs): - i = 0 - while i < len(lines): - line = lines[i] - if re.match(PRAGMA_RE, line): - directive, arg = line.split('#', 1)[1], '' - if '::' in directive: - directive, arg = directive.split('::', 1) - directive, arg = directive.strip(), arg.strip() - i = self.pragma(name, directive, arg, lines, index, i) - else: - i += 1 - return lines, index - - def pragma(self, source, name, arg, lines, index, i): - # we only recognize the 'include' pragama - if name == 'include': - filename = arg.strip('\'"') - return self.include_file(source, filename, lines, index, i, i + 1) - else: - return i + 1 # will be treated as a directive by the parser - - class ModelContext(ParseContext): def __init__(self, rules, /, start=None, config: ParserConfig|None = None, **settings): config = ParserConfig.new(config, **settings) diff --git a/tatsu/parser.py b/tatsu/parser.py index 898fefeb..f22d2fcf 100644 --- a/tatsu/parser.py +++ b/tatsu/parser.py @@ -1,9 +1,48 @@ from __future__ import annotations -from tatsu.bootstrap import EBNFBootstrapParser -from tatsu.semantics import ASTSemantics -from tatsu.parser_semantics import EBNFGrammarSemantics -from tatsu.grammars import EBNFBuffer +import re + +from .buffering import Buffer +from .grammars import PRAGMA_RE +from .semantics import ASTSemantics +from .parser_semantics import EBNFGrammarSemantics +from .bootstrap import EBNFBootstrapParser + + +class EBNFBuffer(Buffer): + def __init__( + self, text, filename=None, comments_re=None, eol_comments_re=None, **kwargs): + super().__init__( + text, + filename=filename, + memoize_lookaheads=False, + comment_recovery=True, + comments_re=comments_re, + eol_comments_re=eol_comments_re, + **kwargs + ) + + def process_block(self, name, lines, index, **kwargs): + i = 0 + while i < len(lines): + line = lines[i] + if re.match(PRAGMA_RE, line): + directive, arg = line.split('#', 1)[1], '' + if '::' in directive: + directive, arg = directive.split('::', 1) + directive, arg = directive.strip(), arg.strip() + i = self.pragma(name, directive, arg, lines, index, i) + else: + i += 1 + return lines, index + + def pragma(self, source, name, arg, lines, index, i): + # we only recognize the 'include' pragama + if name == 'include': + filename = arg.strip('\'"') + return self.include_file(source, filename, lines, index, i, i + 1) + else: + return i + 1 # will be treated as a directive by the parser class EBNFParser(EBNFBootstrapParser): diff --git a/test/grammar/syntax_test.py b/test/grammar/syntax_test.py index 202f6349..9c969e5b 100644 --- a/test/grammar/syntax_test.py +++ b/test/grammar/syntax_test.py @@ -6,7 +6,7 @@ from tatsu import tool from tatsu.util import trim from tatsu.codegen import codegen -from tatsu.grammars import EBNFBuffer +from tatsu.parser import EBNFBuffer class SyntaxTests(unittest.TestCase): diff --git a/test/parsing_test.py b/test/parsing_test.py index 5c8a5a98..ce808ac0 100644 --- a/test/parsing_test.py +++ b/test/parsing_test.py @@ -5,7 +5,7 @@ import tatsu from tatsu.util import trim, eval_escapes, asjson -from tatsu.grammars import EBNFBuffer +from tatsu.parser import EBNFBuffer class MockIncludeBuffer(EBNFBuffer):