Skip to content

Commit

Permalink
fix: use locks to prevent concurrent parsing
Browse files Browse the repository at this point in the history
The parser instances cannot be shared between threads as PyParsing is
not thread-safe, see pyparsing/pyparsing#89.

Fixes WEBLATE-1MFM
  • Loading branch information
nijel committed Dec 17, 2024
1 parent 07c8f81 commit b5b4fd6
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 21 deletions.
2 changes: 2 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ Not yet released.

**Bug fixes**

* Avoid query parser crash in multi-threaded environments.

**Compatibility**

**Upgrading**
Expand Down
9 changes: 7 additions & 2 deletions weblate/auth/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from collections import defaultdict
from functools import cache as functools_cache
from itertools import chain
from typing import TYPE_CHECKING, TypedDict, cast
from typing import TYPE_CHECKING, Literal, TypedDict, cast

import sentry_sdk
from appconf import AppConf
Expand Down Expand Up @@ -294,7 +294,12 @@ def all_admins(self, project):
def order(self):
return self.order_by("username")

def search(self, query: str, parser: str = "user", **context):
def search(
self,
query: str,
parser: Literal["plain", "user", "superuser"] = "user",
**context,
):
"""High level wrapper for searching."""
if parser == "plain":
result = self.filter(
Expand Down
8 changes: 6 additions & 2 deletions weblate/checks/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@

from weblate.checks.models import CHECKS
from weblate.checks.parser import (
FLAGS_PARSER,
FLAGS_PARSER_LOCK,
SYNTAXCHARS,
FlagsParser,
length_validation,
multi_value_flag,
single_value_flag,
Expand Down Expand Up @@ -91,7 +92,10 @@ def _parse_flags_text(flags: str) -> Iterator[str | tuple[Any, ...]]:
state = 0
name: str
value: list[Any] = []
tokens = list(FlagsParser.parse_string(flags, parseAll=True))

with FLAGS_PARSER_LOCK:
tokens = list(FLAGS_PARSER.parse_string(flags, parse_all=True))

for pos, token in enumerate(tokens):
if state == 0 and token == ",":
pass
Expand Down
31 changes: 21 additions & 10 deletions weblate/checks/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
#
# SPDX-License-Identifier: GPL-3.0-or-later

from __future__ import annotations

import re
import threading

from pyparsing import Optional, QuotedString, Regex, ZeroOrMore
from pyparsing import Optional, ParserElement, QuotedString, Regex, ZeroOrMore


def single_value_flag(func, validation=None):
Expand Down Expand Up @@ -32,7 +35,9 @@ def validate_length(val) -> None:
return validate_length


def multi_value_flag(func, minimum=1, maximum=None, modulo=None):
def multi_value_flag(
func, minimum: int = 1, maximum: int | None = None, modulo: int | None = None
):
def parse_values(val):
if modulo and len(val) % modulo != 0:
msg = "Number of parameter is not even"
Expand All @@ -49,7 +54,7 @@ def parse_values(val):


class RawQuotedString(QuotedString):
def __init__(self, quote_char, esc_char="\\") -> None:
def __init__(self, quote_char: str, esc_char: str = "\\") -> None:
super().__init__(quote_char, esc_char=esc_char, convert_whitespace_escapes=True)
# unlike the QuotedString this replaces only escaped quotes and not all chars
self.unquote_scan_re = re.compile(
Expand All @@ -60,14 +65,20 @@ def __init__(self, quote_char, esc_char="\\") -> None:

SYNTAXCHARS = {",", ":", '"', "'", "\\"}

FlagName = Regex(r"""[^,:"' \r\n\t]([^,:"']*[^,:"' \r\n\t])?""")

RegexString = "r" + RawQuotedString('"')
def get_flags_parser() -> ParserElement:
flag_name = Regex(r"""[^,:"' \r\n\t]([^,:"']*[^,:"' \r\n\t])?""")

regex_string = "r" + RawQuotedString('"')

flag_param = Optional(
regex_string | flag_name | RawQuotedString("'") | RawQuotedString('"')
)

flag = flag_name + ZeroOrMore(":" + flag_param)

FlagParam = Optional(
RegexString | FlagName | RawQuotedString("'") | RawQuotedString('"')
)
return Optional(flag) + ZeroOrMore("," + Optional(flag))

Flag = FlagName + ZeroOrMore(":" + FlagParam)

FlagsParser = Optional(Flag) + ZeroOrMore("," + Optional(Flag))
FLAGS_PARSER: ParserElement = get_flags_parser()
FLAGS_PARSER_LOCK = threading.Lock()
6 changes: 5 additions & 1 deletion weblate/utils/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import annotations

from typing import Literal

from crispy_forms.layout import Div, Field
from crispy_forms.utils import TEMPLATE_PACK
from django import forms
Expand All @@ -23,7 +25,9 @@


class QueryField(forms.CharField):
def __init__(self, parser: str = "unit", **kwargs) -> None:
def __init__(
self, parser: Literal["unit", "user", "superuser"] = "unit", **kwargs
) -> None:
if "label" not in kwargs:
kwargs["label"] = gettext_lazy("Query")
if "required" not in kwargs:
Expand Down
19 changes: 14 additions & 5 deletions weblate/utils/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

from __future__ import annotations

import threading
from datetime import datetime, timedelta
from functools import lru_cache, reduce
from itertools import chain
from operator import and_, or_
from typing import TYPE_CHECKING, Any, cast, overload
from typing import TYPE_CHECKING, Any, Literal, cast, overload

from dateutil.parser import ParserError
from dateutil.parser import parse as dateutil_parse
Expand Down Expand Up @@ -253,6 +254,8 @@ def human_date_parse(
) -> datetime | tuple[datetime, datetime]:
tzinfo = timezone.get_current_timezone()

result: datetime | None

try:
# Here we inject 5:55:55 time and if that was not changed
# during parsing, we assume it was not specified while
Expand Down Expand Up @@ -664,11 +667,12 @@ def is_field(self, text: str, context: dict) -> Q:
return super().is_field(text, context)


PARSERS = {
PARSERS: dict[Literal["unit", "user", "superuser"], ParserElement] = {
"unit": build_parser(UnitTermExpr),
"user": build_parser(UserTermExpr),
"superuser": build_parser(SuperuserUserTermExpr),
}
PARSER_LOCK = threading.Lock()


def parser_to_query(obj, context: dict) -> Q:
Expand Down Expand Up @@ -696,13 +700,18 @@ def parser_to_query(obj, context: dict) -> Q:


@lru_cache(maxsize=512)
def parse_string(text: str, parser: str) -> ParseResults:
def parse_string(
text: str, parser: Literal["unit", "user", "superuser"]
) -> ParseResults:
if "\x00" in text:
msg = "Invalid query string."
raise ValueError(msg)
return PARSERS[parser].parse_string(text, parse_all=True)
with PARSER_LOCK:
return PARSERS[parser].parse_string(text, parse_all=True)


def parse_query(text: str, parser: str = "unit", **context) -> Q:
def parse_query(
text: str, parser: Literal["unit", "user", "superuser"] = "unit", **context
) -> Q:
parsed = parse_string(text, parser)
return parser_to_query(parsed, context)
4 changes: 3 additions & 1 deletion weblate/utils/tests/test_search.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Copyright © Michal Čihař <[email protected]>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from __future__ import annotations

from datetime import UTC, datetime, timedelta
from typing import Literal

from django.db.models import F, Q
from django.test import TestCase
Expand All @@ -24,7 +26,7 @@

class SearchTestCase(TestCase):
object_class = Unit
parser = "unit"
parser: Literal["unit", "user", "superuser"] = "unit"

def assert_query(self, string, expected, exists=False, **context) -> None:
result = parse_query(string, parser=self.parser, **context)
Expand Down

0 comments on commit b5b4fd6

Please sign in to comment.