Skip to content

Commit

Permalink
fix(checks): consolidate format string matching to use re.Match
Browse files Browse the repository at this point in the history
Relying on findall and finditer resulted in off by one for some of the
matches causing false positives in some checks.

The code now consistely uses finiter which return re.Match objects and
not a tuple matches as findall.

Fixes WeblateOrg#12746
  • Loading branch information
nijel committed Oct 14, 2024
1 parent bf07a01 commit d2208ae
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 22 deletions.
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Not yet released.

* Displaying :ref:`workflow-customization` setting in some cases.
* Users can add component in any language already existing in a project.
* :ref:`check-unnamed-format` better handles some strings such as :ref:`check-python-brace-format`.

**Compatibility**

Expand Down
108 changes: 86 additions & 22 deletions weblate/checks/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from weblate.checks.base import SourceCheck, TargetCheck

if TYPE_CHECKING:
from collections.abc import Iterable
from collections.abc import Callable, Iterable

from django_stubs_ext import StrOrPromise

Expand Down Expand Up @@ -258,19 +258,79 @@ def name_format_is_position_based(string: str) -> bool: # noqa: FURB118
return not string


FLAG_RULES = {
"python-format": (PYTHON_PRINTF_MATCH, python_format_is_position_based),
"php-format": (PHP_PRINTF_MATCH, c_format_is_position_based),
"c-format": (C_PRINTF_MATCH, c_format_is_position_based),
"object-pascal-format": (PASCAL_FORMAT_MATCH, pascal_format_is_position_based),
"perl-format": (C_PRINTF_MATCH, c_format_is_position_based),
"perl-brace-format": (PERL_BRACE_MATCH, name_format_is_position_based),
"javascript-format": (C_PRINTF_MATCH, c_format_is_position_based),
"lua-format": (C_PRINTF_MATCH, c_format_is_position_based),
"python-brace-format": (PYTHON_BRACE_MATCH, name_format_is_position_based),
"scheme-format": (SCHEME_PRINTF_MATCH, scheme_format_is_position_based),
"c-sharp-format": (C_SHARP_MATCH, name_format_is_position_based),
"java-printf-format": (JAVA_MATCH, c_format_is_position_based),
def extract_string_simple(match: re.Match) -> str:
return match.group(1)


def extract_string_python_brace(match: re.Match) -> str:
# 1 and 2 are escaped braces and 3 is the actual match of format string
return match.group(1) or match.group(2) or match.group(3)


FLAG_RULES: dict[
str,
tuple[
re.Pattern,
Callable[[str], bool],
Callable[[re.Match], str],
],
] = {
"python-format": (
PYTHON_PRINTF_MATCH,
python_format_is_position_based,
extract_string_simple,
),
"php-format": (
PHP_PRINTF_MATCH,
c_format_is_position_based,
extract_string_simple,
),
"c-format": (
C_PRINTF_MATCH,
c_format_is_position_based,
extract_string_simple,
),
"object-pascal-format": (
PASCAL_FORMAT_MATCH,
pascal_format_is_position_based,
extract_string_simple,
),
"perl-format": (C_PRINTF_MATCH, c_format_is_position_based, extract_string_simple),
"perl-brace-format": (
PERL_BRACE_MATCH,
name_format_is_position_based,
extract_string_simple,
),
"javascript-format": (
C_PRINTF_MATCH,
c_format_is_position_based,
extract_string_simple,
),
"lua-format": (
C_PRINTF_MATCH,
c_format_is_position_based,
extract_string_simple,
),
"python-brace-format": (
PYTHON_BRACE_MATCH,
name_format_is_position_based,
extract_string_python_brace,
),
"scheme-format": (
SCHEME_PRINTF_MATCH,
scheme_format_is_position_based,
extract_string_simple,
),
"c-sharp-format": (
C_SHARP_MATCH,
name_format_is_position_based,
extract_string_simple,
),
"java-printf-format": (
JAVA_MATCH,
c_format_is_position_based,
extract_string_simple,
),
}


Expand Down Expand Up @@ -351,12 +411,12 @@ def normalize(self, matches: list[str]) -> list[str]:
return [m for m in matches if m not in self.normalize_remove]

def extract_string(self, match: re.Match) -> str:
return match[0]
return extract_string_simple(match)

def extract_matches(self, string: str) -> list[str]:
return [
self.cleanup_string(self.extract_string(x))
for x in self.regexp.findall(string)
self.cleanup_string(self.extract_string(match))
for match in self.regexp.finditer(string)
]

def check_format(
Expand Down Expand Up @@ -485,11 +545,16 @@ class BasePrintfCheck(BaseFormatCheck):

def __init__(self) -> None:
super().__init__()
self.regexp, self._is_position_based = FLAG_RULES[self.enable_string]
self.regexp, self._is_position_based, self._extract_string = FLAG_RULES[
self.enable_string
]

def is_position_based(self, string: str):
return self._is_position_based(string)

def extract_string(self, match: re.Match) -> str:
return self._extract_string(match)

def format_string(self, string: str) -> str:
return f"%{string}"

Expand Down Expand Up @@ -594,8 +659,7 @@ class PythonBraceFormatCheck(BaseFormatCheck):
normalize_remove: set[str] = {"{", "}"}

def extract_string(self, match: re.Match) -> str:
# 0 and 1 are escaped braces and 2 is the actual match
return match[0] or match[1] or match[2]
return extract_string_python_brace(match)

def is_position_based(self, string: str):
return name_format_is_position_based(string)
Expand Down Expand Up @@ -761,9 +825,9 @@ def check_source_unit(self, sources: list[str], unit: Unit) -> bool:
if not rules:
return False
found = set()
for regexp, is_position_based in rules:
for regexp, is_position_based, extract_string in rules:
for match in regexp.finditer(sources[0]):
if is_position_based(match[1]):
if is_position_based(extract_string(match)):
found.add((match.start(0), match.end(0)))
if len(found) >= 2:
return True
Expand Down
15 changes: 15 additions & 0 deletions weblate/checks/tests/test_format_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1626,3 +1626,18 @@ def test_good_multi_format(self) -> None:
["Test %s"], MockUnit(flags="c-format,python-format")
)
)

def test_good_brace_format(self) -> None:
self.assertFalse(
self.check.check_source(
["Recognition {progress}% ({current_job}/{total_jobs})"],
MockUnit(flags="python-brace-format"),
)
)

def test_bad_brace_format(self) -> None:
self.assertTrue(
self.check.check_source(
["Recognition {}% ({}/{})"], MockUnit(flags="python-brace-format")
)
)

0 comments on commit d2208ae

Please sign in to comment.