Skip to content

Commit

Permalink
Merge remote-tracking branch 'refs/remotes/origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
apalala committed Nov 26, 2023
2 parents da420eb + a02f845 commit 319911b
Show file tree
Hide file tree
Showing 9 changed files with 59 additions and 11 deletions.
9 changes: 8 additions & 1 deletion docs/directives.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,15 @@ Enabling ``@@parseinfo`` will allow precise reporting over the input source-code
``@@whitespace :: <regexp>``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Provides a regular expression for the whitespace to be ignored by the parser. It defaults to ``/(?s)\s+/``:
Provides a regular expression for the whitespace to be ignored by the parser. If no definition is
provided, then ``r'(?m)\s+'`` will be used as default:

.. code::
@@whitespace :: /[\t ]+/
To disable any parsing of whitespace, use ``None`` for the definition:

.. code::
@@whitespace :: None
14 changes: 14 additions & 0 deletions docs/syntax.rst
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,20 @@ overwrite the setting in the grammar::

@@whitespace :: /[\t ]+/

If no ``whitespace`` or ``@@whitespace`` is specified, |TatSu| will use
``r'(?m)\s+'`` as a default. Use ``None`` to have *no whitespace definition*.


.. code:: python
parser = MyParser(text, whitespace=None)
or:

.. code::
@@whitespace :: None
Case Sensitivity
~~~~~~~~~~~~~~~~

Expand Down
2 changes: 1 addition & 1 deletion tatsu/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '5.10.4b1'
__version__ = '5.10.4'
6 changes: 3 additions & 3 deletions tatsu/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
# Any changes you make to it will be overwritten the next time
# the file is generated.

# ruff: noqa: I001, SIM117
# ruff: noqa: I001, F401, SIM117

import sys
from pathlib import Path

from tatsu.buffering import Buffer
from tatsu.parsing import Parser
from tatsu.parsing import tatsumasu
from tatsu.parsing import leftrec, nomemo, isname # noqa: F401
from tatsu.parsing import leftrec, nomemo, isname
from tatsu.infos import ParserConfig
from tatsu.util import re, generic_main # noqa: F401
from tatsu.util import re, generic_main


KEYWORDS: set[str] = set()
Expand Down
16 changes: 14 additions & 2 deletions tatsu/buffering.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@
from typing import Any

from .exceptions import ParseError
from .infos import CommentInfo, LineIndexInfo, LineInfo, ParserConfig, PosLine
from .infos import (
CommentInfo,
LineIndexInfo,
LineInfo,
ParserConfig,
PosLine,
UndefinedStr,
)
from .tokenizing import Tokenizer
from .util import (
RETYPE,
Expand Down Expand Up @@ -76,12 +83,17 @@ def whitespace(self):

@staticmethod
def build_whitespace_re(whitespace):
if whitespace is None:
if type(whitespace) is UndefinedStr:
return WHITESPACE_RE
if whitespace in {None, ''}:
return None
elif isinstance(whitespace, RETYPE):
return whitespace
elif whitespace:
if not isinstance(whitespace, str):
# FIXME:
# this feature is undocumented
# only regular expressions should be supported
# a list or a set?
whitespace = f"[{''.join(c for c in whitespace)}]+"
return re.compile(f'(?m){whitespace}')
Expand Down
6 changes: 3 additions & 3 deletions tatsu/codegen/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,17 +509,17 @@ def {name}(self, ast):
# Any changes you make to it will be overwritten the next time
# the file is generated.
# ruff: noqa: I001, SIM117
# ruff: noqa: I001, F401, SIM117
import sys
from pathlib import Path
from tatsu.buffering import Buffer
from tatsu.parsing import Parser
from tatsu.parsing import tatsumasu
from tatsu.parsing import leftrec, nomemo, isname # noqa: F401
from tatsu.parsing import leftrec, nomemo, isname
from tatsu.infos import ParserConfig
from tatsu.util import re, generic_main # noqa: F401
from tatsu.util import re, generic_main
KEYWORDS: set[str] = set({keywords})
Expand Down
9 changes: 8 additions & 1 deletion tatsu/infos.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,13 @@
from .util.unicode_characters import C_DERIVE


class UndefinedStr(str):
pass


_undefined_str = UndefinedStr()


@dataclasses.dataclass
class ParserConfig:
owner: Any = None
Expand Down Expand Up @@ -48,7 +55,7 @@ class ParserConfig:
ignorecase: bool | None = False
namechars: str = ''
nameguard: bool | None = None # implied by namechars
whitespace: str | None = None
whitespace: str | None = _undefined_str

parseinfo: bool = False

Expand Down
1 change: 1 addition & 0 deletions tatsu/parser_semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def grammar(self, ast, *args):
keywords = list(flatten(ast.keywords)) or []

if directives.get('whitespace') in {'None', 'False'}:
# NOTE: use '' because None will _not_ override defaults in configuration
directives['whitespace'] = ''

name = (
Expand Down
7 changes: 7 additions & 0 deletions test/grammar/directive_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ def test_whitespace_none_directive(self):
else:
self.fail('parsed through non-whitespace')

def test_default_whitespace(self):
grammar = r"""
start = {'x'}+ $;
"""

tatsu.parse(grammar, "x x x")

def test_eol_comments_re_directive(self):
grammar = """
@@eol_comments :: /#.*?$/
Expand Down

0 comments on commit 319911b

Please sign in to comment.