From 82a1165912b80a1550d0c91800f99b132475c29b Mon Sep 17 00:00:00 2001 From: apalala Date: Fri, 13 Oct 2023 10:20:17 -0400 Subject: [PATCH] [grammars] move EBNFBuffer to parser.py --- tatsu/grammars.py | 37 ---------------------------- tatsu/parser.py | 48 +++++++++++++++++++++++++++++++++---- test/grammar/syntax_test.py | 2 +- test/parsing_test.py | 2 +- 4 files changed, 46 insertions(+), 43 deletions(-) diff --git a/tatsu/grammars.py b/tatsu/grammars.py index 4b57cfb7..6e1fc949 100644 --- a/tatsu/grammars.py +++ b/tatsu/grammars.py @@ -15,7 +15,6 @@ from .ast import AST from .contexts import ParseContext from .objectmodel import Node -from .bootstrap import EBNFBootstrapBuffer from .infos import RuleInfo, ParserConfig from .leftrec import Nullable, find_left_recursion from .collections import OrderedSet as oset @@ -49,42 +48,6 @@ def pythonize_name(name): return ''.join('_' + c.lower() if c.isupper() else c for c in name) -class EBNFBuffer(EBNFBootstrapBuffer): - def __init__( - self, text, filename=None, comments_re=None, eol_comments_re=None, **kwargs): - super().__init__( - text, - filename=filename, - memoize_lookaheads=False, - comment_recovery=True, - comments_re=comments_re, - eol_comments_re=eol_comments_re, - **kwargs - ) - - def process_block(self, name, lines, index, **kwargs): - i = 0 - while i < len(lines): - line = lines[i] - if re.match(PRAGMA_RE, line): - directive, arg = line.split('#', 1)[1], '' - if '::' in directive: - directive, arg = directive.split('::', 1) - directive, arg = directive.strip(), arg.strip() - i = self.pragma(name, directive, arg, lines, index, i) - else: - i += 1 - return lines, index - - def pragma(self, source, name, arg, lines, index, i): - # we only recognize the 'include' pragama - if name == 'include': - filename = arg.strip('\'"') - return self.include_file(source, filename, lines, index, i, i + 1) - else: - return i + 1 # will be treated as a directive by the parser - - class ModelContext(ParseContext): def __init__(self, rules, /, start=None, config: ParserConfig|None = None, **settings): config = ParserConfig.new(config, **settings) diff --git a/tatsu/parser.py b/tatsu/parser.py index 898fefeb..38c9f5ed 100644 --- a/tatsu/parser.py +++ b/tatsu/parser.py @@ -1,11 +1,51 @@ from __future__ import annotations -from tatsu.bootstrap import EBNFBootstrapParser -from tatsu.semantics import ASTSemantics -from tatsu.parser_semantics import EBNFGrammarSemantics -from tatsu.grammars import EBNFBuffer +import re +from .bootstrap import EBNFBootstrapParser, EBNFBootstrapBuffer +from .grammars import PRAGMA_RE +from .semantics import ASTSemantics +from .parser_semantics import EBNFGrammarSemantics + +COMMENTS_RE = r'(?sm)[(][*](?:.|\n)*?[)][*])' +EOL_COMMENTS_RE = r'#([^\n]*?)$' + + +class EBNFBuffer(EBNFBootstrapBuffer): + def __init__( + self, text, filename=None, **kwargs): + super().__init__( + text, + filename=filename, + memoize_lookaheads=False, + comment_recovery=True, + # comments_re=COMMENTS_RE, + # eol_comments_re=EOL_COMMENTS_RE, + **kwargs + ) + + def process_block(self, name, lines, index, **kwargs): + i = 0 + while i < len(lines): + line = lines[i] + if re.match(PRAGMA_RE, line): + directive, arg = line.split('#', 1)[1], '' + if '::' in directive: + directive, arg = directive.split('::', 1) + directive, arg = directive.strip(), arg.strip() + i = self.pragma(name, directive, arg, lines, index, i) + else: + i += 1 + return lines, index + + def pragma(self, source, name, arg, lines, index, i): + # we only recognize the 'include' pragama + if name == 'include': + filename = arg.strip('\'"') + return self.include_file(source, filename, lines, index, i, i + 1) + else: + return i + 1 # will be treated as a directive by the parser class EBNFParser(EBNFBootstrapParser): def __init__(self, semantics=None, **kwargs): if semantics is None: diff --git a/test/grammar/syntax_test.py b/test/grammar/syntax_test.py index 202f6349..9c969e5b 100644 --- a/test/grammar/syntax_test.py +++ b/test/grammar/syntax_test.py @@ -6,7 +6,7 @@ from tatsu import tool from tatsu.util import trim from tatsu.codegen import codegen -from tatsu.grammars import EBNFBuffer +from tatsu.parser import EBNFBuffer class SyntaxTests(unittest.TestCase): diff --git a/test/parsing_test.py b/test/parsing_test.py index 5c8a5a98..ce808ac0 100644 --- a/test/parsing_test.py +++ b/test/parsing_test.py @@ -5,7 +5,7 @@ import tatsu from tatsu.util import trim, eval_escapes, asjson -from tatsu.grammars import EBNFBuffer +from tatsu.parser import EBNFBuffer class MockIncludeBuffer(EBNFBuffer):