Skip to content

Commit

Permalink
remove defaults for regular expressions for comments (#314)
Browse files Browse the repository at this point in the history
fixes #312

* [codegen] remder REs for comments into the tokenizer/buffer and parser
* [codegen] do not render a default tokenizer/buffer
* [config] do not set defaults for comment REs
* [parser] upgrade to the ParserConfig protocol
* [test] add unit test for no default comment regexes
  • Loading branch information
apalala authored Oct 13, 2023
1 parent 0a34786 commit ca3f7e6
Show file tree
Hide file tree
Showing 13 changed files with 92 additions and 103 deletions.
2 changes: 1 addition & 1 deletion mypy.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[mypy]
python_version = 3.9
python_version = 3.12
ignore_missing_imports = True
exclude = parsers|docs|build|tmp
19 changes: 2 additions & 17 deletions tatsu/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@
# Any changes you make to it will be overwritten the next time
# the file is generated.

from __future__ import annotations

import sys

from tatsu.buffering import Buffer
from tatsu.parsing import Parser
from tatsu.parsing import tatsumasu
from tatsu.parsing import leftrec, nomemo, isname # noqa
Expand All @@ -26,20 +23,6 @@
} # type: ignore


class EBNFBootstrapBuffer(Buffer):
def __init__(self, text, /, config: ParserConfig | None = None, **settings):
config = ParserConfig.new(
config,
owner=self,
nameguard=None,
ignorecase=False,
namechars='',
parseinfo=True,
)
config = config.replace(**settings)
super().__init__(text, config=config)


class EBNFBootstrapParser(Parser):
def __init__(self, /, config: ParserConfig | None = None, **settings):
config = ParserConfig.new(
Expand All @@ -49,6 +32,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
ignorecase=False,
namechars='',
parseinfo=True,
comments_re='(?sm)[(][*](?:.|\\n)*?[*][)]',
eol_comments_re='#[^\\n]*$',
keywords=KEYWORDS,
start='start',
)
Expand Down
23 changes: 6 additions & 17 deletions tatsu/codegen/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,8 @@ def render_fields(self, fields):
left_recursion = self.node.config.left_recursion
parseinfo = self.node.config.parseinfo
namechars = repr(self.node.config.namechars or '')
comments_re = repr(self.node.config.comments_re)
eol_comments_re = repr(self.node.config.eol_comments_re)

rules = '\n'.join([
self.get_renderer(rule).render() for rule in self.node.rules
Expand All @@ -494,6 +496,8 @@ def render_fields(self, fields):
parseinfo=parseinfo,
keywords=keywords,
namechars=namechars,
comments_re=comments_re,
eol_comments_re=eol_comments_re,
)

abstract_rule_template = '''
Expand All @@ -513,11 +517,8 @@ def {name}(self, ast): # noqa
# Any changes you make to it will be overwritten the next time
# the file is generated.
from __future__ import annotations
import sys
from tatsu.buffering import Buffer
from tatsu.parsing import Parser
from tatsu.parsing import tatsumasu
from tatsu.parsing import leftrec, nomemo, isname # noqa
Expand All @@ -528,20 +529,6 @@ def {name}(self, ast): # noqa
KEYWORDS = {{{keywords}}} # type: ignore
class {name}Buffer(Buffer):
def __init__(self, text, /, config: ParserConfig | None = None, **settings):
config = ParserConfig.new(
config,
owner=self,
nameguard={nameguard},
ignorecase={ignorecase},
namechars={namechars},
parseinfo={parseinfo},
)
config = config.replace(**settings)
super().__init__(text, config=config)
class {name}Parser(Parser):
def __init__(self, /, config: ParserConfig | None = None, **settings):
config = ParserConfig.new(
Expand All @@ -551,6 +538,8 @@ def __init__(self, /, config: ParserConfig | None = None, **settings):
ignorecase={ignorecase},
namechars={namechars},
parseinfo={parseinfo},
comments_re={comments_re},
eol_comments_re={eol_comments_re},
keywords=KEYWORDS,
start={start!r},
)
Expand Down
37 changes: 0 additions & 37 deletions tatsu/grammars.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from .ast import AST
from .contexts import ParseContext
from .objectmodel import Node
from .bootstrap import EBNFBootstrapBuffer
from .infos import RuleInfo, ParserConfig
from .leftrec import Nullable, find_left_recursion
from .collections import OrderedSet as oset
Expand Down Expand Up @@ -49,42 +48,6 @@ def pythonize_name(name):
return ''.join('_' + c.lower() if c.isupper() else c for c in name)


class EBNFBuffer(EBNFBootstrapBuffer):
def __init__(
self, text, filename=None, comments_re=None, eol_comments_re=None, **kwargs):
super().__init__(
text,
filename=filename,
memoize_lookaheads=False,
comment_recovery=True,
comments_re=comments_re,
eol_comments_re=eol_comments_re,
**kwargs
)

def process_block(self, name, lines, index, **kwargs):
i = 0
while i < len(lines):
line = lines[i]
if re.match(PRAGMA_RE, line):
directive, arg = line.split('#', 1)[1], ''
if '::' in directive:
directive, arg = directive.split('::', 1)
directive, arg = directive.strip(), arg.strip()
i = self.pragma(name, directive, arg, lines, index, i)
else:
i += 1
return lines, index

def pragma(self, source, name, arg, lines, index, i):
# we only recognize the 'include' pragama
if name == 'include':
filename = arg.strip('\'"')
return self.include_file(source, filename, lines, index, i, i + 1)
else:
return i + 1 # will be treated as a directive by the parser


class ModelContext(ParseContext):
def __init__(self, rules, /, start=None, config: ParserConfig|None = None, **settings):
config = ParserConfig.new(config, **settings)
Expand Down
8 changes: 2 additions & 6 deletions tatsu/infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@
from .tokenizing import Tokenizer


COMMENTS_RE = r'\(\*((?:.|\n)*?)\*\)'
EOL_COMMENTS_RE = r'#([^\n]*?)$'


@dataclasses.dataclass
class ParserConfig:
owner: Any = None
Expand All @@ -30,8 +26,8 @@ class ParserConfig:
start_rule: str|None = None # FIXME
rule_name: str|None = None # Backward compatibility

comments_re: str|None = COMMENTS_RE
eol_comments_re: str|None = EOL_COMMENTS_RE
comments_re: str|None = None
eol_comments_re: str|None = None

tokenizercls: Type[Tokenizer]|None = None # FIXME
semantics: Type|None = None
Expand Down
66 changes: 54 additions & 12 deletions tatsu/parser.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,67 @@
from __future__ import annotations
import re
from typing import Any

from tatsu.bootstrap import EBNFBootstrapParser
from tatsu.semantics import ASTSemantics
from tatsu.parser_semantics import EBNFGrammarSemantics
from tatsu.grammars import EBNFBuffer
from .infos import ParserConfig
from .buffering import Buffer
from .grammars import PRAGMA_RE
from .semantics import ASTSemantics
from .parser_semantics import EBNFGrammarSemantics
from .bootstrap import EBNFBootstrapParser


class EBNFBuffer(Buffer):
def __init__(self, text, /, filename=None, config: ParserConfig|None = None, **settings: Any):
config = ParserConfig.new(
config=config,
owner=self,
filename=filename,
**settings)
super().__init__(text, config=config)

def process_block(self, name, lines, index, **kwargs):
i = 0
while i < len(lines):
line = lines[i]
if re.match(PRAGMA_RE, line):
directive, arg = line.split('#', 1)[1], ''
if '::' in directive:
directive, arg = directive.split('::', 1)
directive, arg = directive.strip(), arg.strip()
i = self.pragma(name, directive, arg, lines, index, i)
else:
i += 1
return lines, index

def pragma(self, source, name, arg, lines, index, i):
# we only recognize the 'include' pragama
if name == 'include':
filename = arg.strip('\'"')
return self.include_file(source, filename, lines, index, i, i + 1)
else:
return i + 1 # will be treated as a directive by the parser


class EBNFParser(EBNFBootstrapParser):
def __init__(self, semantics=None, **kwargs):
def __init__(self, name: str | None = None, config: ParserConfig|None = None, semantics=None, **settings: Any):
if semantics is None:
semantics = ASTSemantics()
super().__init__(semantics=semantics, **kwargs)
config = ParserConfig.new(
config=config,
name=name,
semantics=semantics,
**settings)
super().__init__(config)


class GrammarGenerator(EBNFBootstrapParser):
def __init__(self, grammar_name=None, semantics=None, parseinfo=True, **kwargs):
def __init__(self, name: str | None = None, config: ParserConfig|None = None, semantics=None, **settings: Any):
if semantics is None:
semantics = EBNFGrammarSemantics(grammar_name)
super().__init__(
semantics = EBNFGrammarSemantics(name)
config = ParserConfig.new(
config=config,
name=name,
semantics=semantics,
parseinfo=parseinfo,
tokenizercls=EBNFBuffer,
**kwargs
**settings,
)
super().__init__(config)
2 changes: 1 addition & 1 deletion test/grammar/alerts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def test_alert_interpolation():
input = '42 69'
grammar = '''
grammar = r'''
start = a:number b: number i:^`"seen: {a}, {b}"` $ ;
number = /\d+/ ;
'''
Expand Down
6 changes: 3 additions & 3 deletions test/grammar/constants_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def test_constant_interpolation():
input = '42 69'
grammar = '''
grammar = r'''
start = a:number b: number i:`"seen: {a}, {b}"` $ ;
number = /\d+/ ;
'''
Expand All @@ -14,7 +14,7 @@ def test_constant_interpolation():

def test_constant_interpolation_free():
input = '42 69'
grammar = '''
grammar = r'''
start = a:number b: number i:`seen: {a}, {b}` $ ;
number = /\d+/ ;
'''
Expand All @@ -23,7 +23,7 @@ def test_constant_interpolation_free():

def test_constant_interpolation_multiline():
input = '42 69'
grammar = '''
grammar = r'''
start = a:number b: number
i:```
seen:
Expand Down
4 changes: 2 additions & 2 deletions test/grammar/defines_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def test_name_in_option():
grammar = '''
grammar = r'''
start = expr_range ;
expr_range =
Expand Down Expand Up @@ -41,7 +41,7 @@ def test_name_in_option():


def test_by_option():
grammar = '''
grammar = r'''
start = expr_range ;
expr_range =
Expand Down
2 changes: 1 addition & 1 deletion test/grammar/keyword_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def test_sparse_keywords(self):
self.assertTrue('"%s" is a reserved word' % k in str(e))

def test_ignorecase_keywords(self):
grammar = '''
grammar = r'''
@@ignorecase :: True
@@keyword :: if
Expand Down
18 changes: 16 additions & 2 deletions test/grammar/syntax_test.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# -*- coding: utf-8 -*-
import unittest
import pytest

from tatsu.exceptions import FailedParse
from tatsu.exceptions import FailedParse, FailedToken
from tatsu.tool import compile
from tatsu import tool
from tatsu.util import trim
from tatsu.codegen import codegen
from tatsu.grammars import EBNFBuffer
from tatsu.parser import EBNFBuffer


class SyntaxTests(unittest.TestCase):
Expand Down Expand Up @@ -363,3 +364,16 @@ def test_parse_void():
ast = tool.parse(grammar, '')
print(ast)
assert ast is None


def test_no_default_comments():
grammar = '''
start = 'a' $;
'''

text = '''
# no comments are valid
a
'''
with pytest.raises(FailedToken):
tool.parse(grammar, text)
6 changes: 3 additions & 3 deletions test/parser_equivalence_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"""
OUTPUT = {'number_of_dice': '1', 'sides': '3'}

GRAMMAR = """
GRAMMAR = r"""
start = expression $;
int = /-?\d+/ ;
Expand Down Expand Up @@ -82,7 +82,7 @@ def test_error_messages():

# @pytest.mark.skip('work in progress')
def test_name_checked():
grammar = '''
grammar = r'''
@@grammar :: Test
@@ignorecase :: True
@@keyword :: if
Expand Down Expand Up @@ -123,7 +123,7 @@ def test_first_rule():


def test_dynamic_compiled_ast():
grammar = '''
grammar = r'''
test::Test = 'TEST' ['A' a:number] ['B' b:number] ;
number::int = /\d+/ ;
'''
Expand Down
2 changes: 1 addition & 1 deletion test/parsing_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import tatsu
from tatsu.util import trim, eval_escapes, asjson
from tatsu.grammars import EBNFBuffer
from tatsu.parser import EBNFBuffer


class MockIncludeBuffer(EBNFBuffer):
Expand Down

0 comments on commit ca3f7e6

Please sign in to comment.