Skip to content

Commit

Permalink
[buffering] optimize use of regular expressions (#328)
Browse files Browse the repository at this point in the history
* [buffering] optimizations in regex handling
  • Loading branch information
apalala authored Nov 20, 2023
1 parent 06509b6 commit e9f20be
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 24 deletions.
56 changes: 35 additions & 21 deletions tatsu/buffering.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,19 +243,25 @@ def _index_comments(self, comments, selector):
previous.extend(comments)

def _eat_regex(self, regex):
if regex is not None:
return list(takewhile(identity, map(self.matchre, repeat(regex))))
return None
if not regex:
return
while self._matchre_fast(regex):
pass

def _eat_regex_list(self, regex):
if not regex:
return []
return list(takewhile(identity, map(self.matchre, repeat(regex))))

def eat_whitespace(self):
return self._eat_regex(self.whitespace_re)

def eat_comments(self):
comments = self._eat_regex(self.config.comments_re)
comments = self._eat_regex_list(self.config.comments_re)
self._index_comments(comments, lambda x: x.inline)

def eat_eol_comments(self):
comments = self._eat_regex(self.config.eol_comments_re)
comments = self._eat_regex_list(self.config.eol_comments_re)
self._index_comments(comments, lambda x: x.eol)

def next_token(self):
Expand Down Expand Up @@ -293,7 +299,7 @@ def is_space(self):
def is_name_char(self, c):
return c is not None and (c.isalnum() or c in self._namechar_set)

def match(self, token):
def match(self, token: str) -> str | None:
if token is None:
return self.atend()

Expand All @@ -303,23 +309,31 @@ def match(self, token):
else:
is_match = self.text[p: p + len(token)] == token

if is_match:
self.move(len(token))
partial_match = (
self.nameguard
and token
and token[0].isalpha()
and all(self.is_name_char(t) for t in token)
and self.is_name_char(self.current)
)
if not partial_match:
return token
self.goto(p)
return None
if not is_match:
return None

self.move(len(token))
partial_match = (
self.nameguard
and token
and token[0].isalpha()
and self.is_name_char(self.current)
and all(self.is_name_char(t) for t in token)
)
if partial_match:
self.goto(p)
return None

return token

def _matchre_fast(self, pattern):
if not (match := self._scanre(pattern)):
return

self.move(len(match.group()))

def matchre(self, pattern):
match = self._scanre(pattern)
if match is None:
if not (match := self._scanre(pattern)):
return None

matched = match.group()
Expand Down
4 changes: 1 addition & 3 deletions tatsu/util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,8 @@ def match_to_find(m: re.Match):
g = m.groups(default=m.string[0:0])
if len(g) == 1:
return g[0]
elif g:
return g
else:
return m.group()
return g or m.group()


def findalliter(pattern, string, pos=None, endpos=None, flags=0):
Expand Down

0 comments on commit e9f20be

Please sign in to comment.