From 82a1165912b80a1550d0c91800f99b132475c29b Mon Sep 17 00:00:00 2001
From: apalala <apalala@gmail.com>
Date: Fri, 13 Oct 2023 10:20:17 -0400
Subject: [PATCH] [grammars] move EBNFBuffer to parser.py

---
 tatsu/grammars.py           | 37 ----------------------------
 tatsu/parser.py             | 48 +++++++++++++++++++++++++++++++++----
 test/grammar/syntax_test.py |  2 +-
 test/parsing_test.py        |  2 +-
 4 files changed, 46 insertions(+), 43 deletions(-)

diff --git a/tatsu/grammars.py b/tatsu/grammars.py
index 4b57cfb7..6e1fc949 100644
--- a/tatsu/grammars.py
+++ b/tatsu/grammars.py
@@ -15,7 +15,6 @@
 from .ast import AST
 from .contexts import ParseContext
 from .objectmodel import Node
-from .bootstrap import EBNFBootstrapBuffer
 from .infos import RuleInfo, ParserConfig
 from .leftrec import Nullable, find_left_recursion
 from .collections import OrderedSet as oset
@@ -49,42 +48,6 @@ def pythonize_name(name):
     return ''.join('_' + c.lower() if c.isupper() else c for c in name)
 
 
-class EBNFBuffer(EBNFBootstrapBuffer):
-    def __init__(
-            self, text, filename=None, comments_re=None, eol_comments_re=None, **kwargs):
-        super().__init__(
-            text,
-            filename=filename,
-            memoize_lookaheads=False,
-            comment_recovery=True,
-            comments_re=comments_re,
-            eol_comments_re=eol_comments_re,
-            **kwargs
-        )
-
-    def process_block(self, name, lines, index, **kwargs):
-        i = 0
-        while i < len(lines):
-            line = lines[i]
-            if re.match(PRAGMA_RE, line):
-                directive, arg = line.split('#', 1)[1], ''
-                if '::' in directive:
-                    directive, arg = directive.split('::', 1)
-                directive, arg = directive.strip(), arg.strip()
-                i = self.pragma(name, directive, arg, lines, index, i)
-            else:
-                i += 1
-        return lines, index
-
-    def pragma(self, source, name, arg, lines, index, i):
-        # we only recognize the 'include' pragama
-        if name == 'include':
-            filename = arg.strip('\'"')
-            return self.include_file(source, filename, lines, index, i, i + 1)
-        else:
-            return i + 1  # will be treated as a directive by the parser
-
-
 class ModelContext(ParseContext):
     def __init__(self, rules, /, start=None, config: ParserConfig|None = None, **settings):
         config = ParserConfig.new(config, **settings)
diff --git a/tatsu/parser.py b/tatsu/parser.py
index 898fefeb..38c9f5ed 100644
--- a/tatsu/parser.py
+++ b/tatsu/parser.py
@@ -1,11 +1,51 @@
 from __future__ import annotations
 
-from tatsu.bootstrap import EBNFBootstrapParser
-from tatsu.semantics import ASTSemantics
-from tatsu.parser_semantics import EBNFGrammarSemantics
-from tatsu.grammars import EBNFBuffer
+import re
 
+from .bootstrap import EBNFBootstrapParser, EBNFBootstrapBuffer
+from .grammars import PRAGMA_RE
+from .semantics import ASTSemantics
+from .parser_semantics import EBNFGrammarSemantics
 
+
+COMMENTS_RE = r'(?sm)[(][*](?:.|\n)*?[)][*])'
+EOL_COMMENTS_RE = r'#([^\n]*?)$'
+
+
+class EBNFBuffer(EBNFBootstrapBuffer):
+    def __init__(
+            self, text, filename=None, **kwargs):
+        super().__init__(
+            text,
+            filename=filename,
+            memoize_lookaheads=False,
+            comment_recovery=True,
+            # comments_re=COMMENTS_RE,
+            # eol_comments_re=EOL_COMMENTS_RE,
+            **kwargs
+        )
+
+    def process_block(self, name, lines, index, **kwargs):
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            if re.match(PRAGMA_RE, line):
+                directive, arg = line.split('#', 1)[1], ''
+                if '::' in directive:
+                    directive, arg = directive.split('::', 1)
+                directive, arg = directive.strip(), arg.strip()
+                i = self.pragma(name, directive, arg, lines, index, i)
+            else:
+                i += 1
+        return lines, index
+
+    def pragma(self, source, name, arg, lines, index, i):
+        # we only recognize the 'include' pragama
+        if name == 'include':
+            filename = arg.strip('\'"')
+            return self.include_file(source, filename, lines, index, i, i + 1)
+        else:
+            return i + 1  # will be treated as a directive by the parser
 class EBNFParser(EBNFBootstrapParser):
     def __init__(self, semantics=None, **kwargs):
         if semantics is None:
diff --git a/test/grammar/syntax_test.py b/test/grammar/syntax_test.py
index 202f6349..9c969e5b 100644
--- a/test/grammar/syntax_test.py
+++ b/test/grammar/syntax_test.py
@@ -6,7 +6,7 @@
 from tatsu import tool
 from tatsu.util import trim
 from tatsu.codegen import codegen
-from tatsu.grammars import EBNFBuffer
+from tatsu.parser import EBNFBuffer
 
 
 class SyntaxTests(unittest.TestCase):
diff --git a/test/parsing_test.py b/test/parsing_test.py
index 5c8a5a98..ce808ac0 100644
--- a/test/parsing_test.py
+++ b/test/parsing_test.py
@@ -5,7 +5,7 @@
 
 import tatsu
 from tatsu.util import trim, eval_escapes, asjson
-from tatsu.grammars import EBNFBuffer
+from tatsu.parser import EBNFBuffer
 
 
 class MockIncludeBuffer(EBNFBuffer):