Skip to content

Commit

Permalink
fix(checks): consolidate format string matching to use re.Match
Browse files Browse the repository at this point in the history
Relying on findall and finditer resulted in off by one for some of the
matches causing false positives in some checks.

The code now consistely uses finiter which return re.Match objects and
not a tuple matches as findall.

Fixes WeblateOrg#12746
  • Loading branch information
nijel committed Oct 14, 2024
1 parent bf07a01 commit ef1d023
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 23 deletions.
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Not yet released.

* Displaying :ref:`workflow-customization` setting in some cases.
* Users can add component in any language already existing in a project.
* :ref:`check-unnamed-format` better handles some strings, such as :ref:`check-python-brace-format`.

**Compatibility**

Expand Down
108 changes: 86 additions & 22 deletions weblate/checks/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from weblate.checks.base import SourceCheck, TargetCheck

if TYPE_CHECKING:
from collections.abc import Iterable
from collections.abc import Callable, Iterable

from django_stubs_ext import StrOrPromise

Expand Down Expand Up @@ -258,19 +258,79 @@ def name_format_is_position_based(string: str) -> bool: # noqa: FURB118
return not string


FLAG_RULES = {
"python-format": (PYTHON_PRINTF_MATCH, python_format_is_position_based),
"php-format": (PHP_PRINTF_MATCH, c_format_is_position_based),
"c-format": (C_PRINTF_MATCH, c_format_is_position_based),
"object-pascal-format": (PASCAL_FORMAT_MATCH, pascal_format_is_position_based),
"perl-format": (C_PRINTF_MATCH, c_format_is_position_based),
"perl-brace-format": (PERL_BRACE_MATCH, name_format_is_position_based),
"javascript-format": (C_PRINTF_MATCH, c_format_is_position_based),
"lua-format": (C_PRINTF_MATCH, c_format_is_position_based),
"python-brace-format": (PYTHON_BRACE_MATCH, name_format_is_position_based),
"scheme-format": (SCHEME_PRINTF_MATCH, scheme_format_is_position_based),
"c-sharp-format": (C_SHARP_MATCH, name_format_is_position_based),
"java-printf-format": (JAVA_MATCH, c_format_is_position_based),
def extract_string_simple(match: re.Match) -> str:
return match.group(1)


def extract_string_python_brace(match: re.Match) -> str:
# 1 and 2 are escaped braces and 3 is the actual match of format string
return match.group(1) or match.group(2) or match.group(3)


FLAG_RULES: dict[
str,
tuple[
re.Pattern,
Callable[[str], bool],
Callable[[re.Match], str],
],
] = {
"python-format": (
PYTHON_PRINTF_MATCH,
python_format_is_position_based,
extract_string_simple,
),
"php-format": (
PHP_PRINTF_MATCH,
c_format_is_position_based,
extract_string_simple,
),
"c-format": (
C_PRINTF_MATCH,
c_format_is_position_based,
extract_string_simple,
),
"object-pascal-format": (
PASCAL_FORMAT_MATCH,
pascal_format_is_position_based,
extract_string_simple,
),
"perl-format": (C_PRINTF_MATCH, c_format_is_position_based, extract_string_simple),
"perl-brace-format": (
PERL_BRACE_MATCH,
name_format_is_position_based,
extract_string_simple,
),
"javascript-format": (
C_PRINTF_MATCH,
c_format_is_position_based,
extract_string_simple,
),
"lua-format": (
C_PRINTF_MATCH,
c_format_is_position_based,
extract_string_simple,
),
"python-brace-format": (
PYTHON_BRACE_MATCH,
name_format_is_position_based,
extract_string_python_brace,
),
"scheme-format": (
SCHEME_PRINTF_MATCH,
scheme_format_is_position_based,
extract_string_simple,
),
"c-sharp-format": (
C_SHARP_MATCH,
name_format_is_position_based,
extract_string_simple,
),
"java-printf-format": (
JAVA_MATCH,
c_format_is_position_based,
extract_string_simple,
),
}


Expand Down Expand Up @@ -351,12 +411,12 @@ def normalize(self, matches: list[str]) -> list[str]:
return [m for m in matches if m not in self.normalize_remove]

def extract_string(self, match: re.Match) -> str:
return match[0]
return extract_string_simple(match)

def extract_matches(self, string: str) -> list[str]:
return [
self.cleanup_string(self.extract_string(x))
for x in self.regexp.findall(string)
self.cleanup_string(self.extract_string(match))
for match in self.regexp.finditer(string)
]

def check_format(
Expand Down Expand Up @@ -485,11 +545,16 @@ class BasePrintfCheck(BaseFormatCheck):

def __init__(self) -> None:
super().__init__()
self.regexp, self._is_position_based = FLAG_RULES[self.enable_string]
self.regexp, self._is_position_based, self._extract_string = FLAG_RULES[
self.enable_string
]

def is_position_based(self, string: str):
return self._is_position_based(string)

def extract_string(self, match: re.Match) -> str:
return self._extract_string(match)

def format_string(self, string: str) -> str:
return f"%{string}"

Expand Down Expand Up @@ -594,8 +659,7 @@ class PythonBraceFormatCheck(BaseFormatCheck):
normalize_remove: set[str] = {"{", "}"}

def extract_string(self, match: re.Match) -> str:
# 0 and 1 are escaped braces and 2 is the actual match
return match[0] or match[1] or match[2]
return extract_string_python_brace(match)

def is_position_based(self, string: str):
return name_format_is_position_based(string)
Expand Down Expand Up @@ -761,9 +825,9 @@ def check_source_unit(self, sources: list[str], unit: Unit) -> bool:
if not rules:
return False
found = set()
for regexp, is_position_based in rules:
for regexp, is_position_based, extract_string in rules:
for match in regexp.finditer(sources[0]):
if is_position_based(match[1]):
if is_position_based(extract_string(match)):
found.add((match.start(0), match.end(0)))
if len(found) >= 2:
return True
Expand Down
2 changes: 1 addition & 1 deletion weblate/checks/same.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def strip_format(msg, flags):
These are quite often not changed by translators.
"""
for format_flag, (regex, _is_position_based) in FLAG_RULES.items():
for format_flag, (regex, _is_position_based, _extract_string) in FLAG_RULES.items():
if format_flag in flags:
return regex.sub("", msg)

Expand Down
15 changes: 15 additions & 0 deletions weblate/checks/tests/test_format_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1626,3 +1626,18 @@ def test_good_multi_format(self) -> None:
["Test %s"], MockUnit(flags="c-format,python-format")
)
)

def test_good_brace_format(self) -> None:
self.assertFalse(
self.check.check_source(
["Recognition {progress}% ({current_job}/{total_jobs})"],
MockUnit(flags="python-brace-format"),
)
)

def test_bad_brace_format(self) -> None:
self.assertTrue(
self.check.check_source(
["Recognition {}% ({}/{})"], MockUnit(flags="python-brace-format")
)
)

0 comments on commit ef1d023

Please sign in to comment.