diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 65ff661a0..7a9ce385f 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -19,10 +19,11 @@ import shutil import sys import tempfile +import warnings from collections import OrderedDict from configparser import RawConfigParser from io import StringIO -from typing import Iterable +from typing import BinaryIO, Iterable, Literal from babel import Locale, localedata from babel import __version__ as VERSION @@ -53,6 +54,12 @@ class SetupError(BaseError): pass +class ConfigurationError(BaseError): + """ + Raised for errors in configuration files. + """ + + def listify_value(arg, split=None): """ Make a list out of an argument. @@ -534,8 +541,21 @@ def _get_mappings(self): mappings = [] if self.mapping_file: - with open(self.mapping_file) as fileobj: - method_map, options_map = parse_mapping(fileobj) + if self.mapping_file.endswith(".toml"): + with open(self.mapping_file, "rb") as fileobj: + file_style = ( + "pyproject.toml" + if os.path.basename(self.mapping_file) == "pyproject.toml" + else "standalone" + ) + method_map, options_map = _parse_mapping_toml( + fileobj, + filename=self.mapping_file, + style=file_style, + ) + else: + with open(self.mapping_file) as fileobj: + method_map, options_map = parse_mapping_cfg(fileobj, filename=self.mapping_file) for path in self.input_paths: mappings.append((path, method_map, options_map)) @@ -543,7 +563,7 @@ def _get_mappings(self): message_extractors = self.distribution.message_extractors for path, mapping in message_extractors.items(): if isinstance(mapping, str): - method_map, options_map = parse_mapping(StringIO(mapping)) + method_map, options_map = parse_mapping_cfg(StringIO(mapping)) else: method_map, options_map = [], {} for pattern, method, options in mapping: @@ -980,53 +1000,19 @@ def main(): def parse_mapping(fileobj, filename=None): - """Parse an extraction method mapping from a file-like object. + warnings.warn( + "parse_mapping is deprecated, use parse_mapping_cfg instead", + DeprecationWarning, + stacklevel=2, + ) + return parse_mapping_cfg(fileobj, filename) - >>> buf = StringIO(''' - ... [extractors] - ... custom = mypackage.module:myfunc - ... - ... # Python source files - ... [python: **.py] - ... - ... # Genshi templates - ... [genshi: **/templates/**.html] - ... include_attrs = - ... [genshi: **/templates/**.txt] - ... template_class = genshi.template:TextTemplate - ... encoding = latin-1 - ... - ... # Some custom extractor - ... [custom: **/custom/*.*] - ... ''') - - >>> method_map, options_map = parse_mapping(buf) - >>> len(method_map) - 4 - - >>> method_map[0] - ('**.py', 'python') - >>> options_map['**.py'] - {} - >>> method_map[1] - ('**/templates/**.html', 'genshi') - >>> options_map['**/templates/**.html']['include_attrs'] - '' - >>> method_map[2] - ('**/templates/**.txt', 'genshi') - >>> options_map['**/templates/**.txt']['template_class'] - 'genshi.template:TextTemplate' - >>> options_map['**/templates/**.txt']['encoding'] - 'latin-1' - - >>> method_map[3] - ('**/custom/*.*', 'mypackage.module:myfunc') - >>> options_map['**/custom/*.*'] - {} + +def parse_mapping_cfg(fileobj, filename=None): + """Parse an extraction method mapping from a file-like object. :param fileobj: a readable file-like object containing the configuration text to parse - :see: `extract_from_directory` """ extractors = {} method_map = [] @@ -1053,6 +1039,94 @@ def parse_mapping(fileobj, filename=None): return method_map, options_map +def _parse_config_object(config: dict, *, filename="(unknown)"): + extractors = {} + method_map = [] + options_map = {} + + extractors_read = config.get("extractors", {}) + if not isinstance(extractors_read, dict): + raise ConfigurationError(f"{filename}: extractors: Expected a dictionary, got {type(extractors_read)!r}") + for method, callable_spec in extractors_read.items(): + if not isinstance(method, str): + # Impossible via TOML, but could happen with a custom object. + raise ConfigurationError(f"{filename}: extractors: Extraction method must be a string, got {method!r}") + if not isinstance(callable_spec, str): + raise ConfigurationError(f"{filename}: extractors: Callable specification must be a string, got {callable_spec!r}") + extractors[method] = callable_spec + + if "mapping" in config: + raise ConfigurationError(f"{filename}: 'mapping' is not a valid key, did you mean 'mappings'?") + + mappings_read = config.get("mappings", []) + if not isinstance(mappings_read, list): + raise ConfigurationError(f"{filename}: mappings: Expected a list, got {type(mappings_read)!r}") + for idx, entry in enumerate(mappings_read): + if not isinstance(entry, dict): + raise ConfigurationError(f"{filename}: mappings[{idx}]: Expected a dictionary, got {type(entry)!r}") + entry = entry.copy() + + method = entry.pop("method", None) + if not isinstance(method, str): + raise ConfigurationError(f"{filename}: mappings[{idx}]: 'method' must be a string, got {method!r}") + method = extractors.get(method, method) # Map the extractor name to the callable now + + pattern = entry.pop("pattern", None) + if not isinstance(pattern, (list, str)): + raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' must be a list or a string, got {pattern!r}") + if not isinstance(pattern, list): + pattern = [pattern] + + for pat in pattern: + if not isinstance(pat, str): + raise ConfigurationError(f"{filename}: mappings[{idx}]: 'pattern' elements must be strings, got {pat!r}") + method_map.append((pat, method)) + options_map[pat] = entry + + return method_map, options_map + + +def _parse_mapping_toml( + fileobj: BinaryIO, + filename: str = "(unknown)", + style: Literal["standalone", "pyproject.toml"] = "standalone", +): + """Parse an extraction method mapping from a binary file-like object. + + .. warning: As of this version of Babel, this is a private API subject to changes. + + :param fileobj: a readable binary file-like object containing the configuration TOML to parse + :param filename: the name of the file being parsed, for error messages + :param style: whether the file is in the style of a `pyproject.toml` file, i.e. whether to look for `tool.babel`. + """ + try: + import tomllib + except ImportError: + try: + import tomli as tomllib + except ImportError as ie: # pragma: no cover + raise ImportError("tomli or tomllib is required to parse TOML files") from ie + + try: + parsed_data = tomllib.load(fileobj) + except tomllib.TOMLDecodeError as e: + raise ConfigurationError(f"{filename}: Error parsing TOML file: {e}") from e + + if style == "pyproject.toml": + try: + babel_data = parsed_data["tool"]["babel"] + except (TypeError, KeyError) as e: + raise ConfigurationError(f"{filename}: No 'tool.babel' section found in file") from e + elif style == "standalone": + babel_data = parsed_data + if "babel" in babel_data: + raise ConfigurationError(f"{filename}: 'babel' should not be present in a stand-alone configuration file") + else: # pragma: no cover + raise ValueError(f"Unknown TOML style {style!r}") + + return _parse_config_object(babel_data, filename=filename) + + def _parse_spec(s: str) -> tuple[int | None, tuple[int | tuple[int, str], ...]]: inds = [] number = None diff --git a/tests/messages/test_frontend.py b/tests/messages/test_frontend.py index 45638b3b1..7a6b08c44 100644 --- a/tests/messages/test_frontend.py +++ b/tests/messages/test_frontend.py @@ -11,12 +11,14 @@ # history and logs, available at http://babel.edgewall.org/log/. import logging import os +import re import shlex import shutil import sys import time import unittest from datetime import datetime, timedelta +from functools import partial from io import BytesIO, StringIO from typing import List @@ -1388,25 +1390,86 @@ def test_update_init_missing(self): assert len(catalog) == 4 # Catalog was updated -def test_parse_mapping(): - buf = StringIO( - '[extractors]\n' - 'custom = mypackage.module:myfunc\n' - '\n' - '# Python source files\n' - '[python: **.py]\n' - '\n' - '# Genshi templates\n' - '[genshi: **/templates/**.html]\n' - 'include_attrs =\n' - '[genshi: **/templates/**.txt]\n' - 'template_class = genshi.template:TextTemplate\n' - 'encoding = latin-1\n' - '\n' - '# Some custom extractor\n' - '[custom: **/custom/*.*]\n') - - method_map, options_map = frontend.parse_mapping(buf) +mapping_cfg = """ +[extractors] +custom = mypackage.module:myfunc + +# Python source files +[python: **.py] + +# Genshi templates +[genshi: **/templates/**.html] +include_attrs = + +[genshi: **/templates/**.txt] +template_class = genshi.template:TextTemplate +encoding = latin-1 + +# Some custom extractor +[custom: **/custom/*.*] +""" + +mapping_toml = """ +[extractors] +custom = "mypackage.module:myfunc" + +# Python source files +[[mappings]] +method = "python" +pattern = "**.py" + +# Genshi templates +[[mappings]] +method = "genshi" +pattern = "**/templates/**.html" +include_attrs = "" + +[[mappings]] +method = "genshi" +pattern = "**/templates/**.txt" +template_class = "genshi.template:TextTemplate" +encoding = "latin-1" + +# Some custom extractor +[[mappings]] +method = "custom" +pattern = "**/custom/*.*" +""" + + +@pytest.mark.parametrize( + ("data", "parser", "preprocess", "is_toml"), + [ + ( + mapping_cfg, + frontend.parse_mapping_cfg, + None, + False, + ), + ( + mapping_toml, + frontend._parse_mapping_toml, + None, + True, + ), + ( + mapping_toml, + partial(frontend._parse_mapping_toml, style="pyproject.toml"), + lambda s: re.sub(r"^(\[+)", r"\1tool.babel.", s, flags=re.MULTILINE), + True, + ), + ], + ids=("cfg", "toml", "pyproject-toml"), +) +def test_parse_mapping(data: str, parser, preprocess, is_toml): + if preprocess: + data = preprocess(data) + if is_toml: + buf = BytesIO(data.encode()) + else: + buf = StringIO(data) + + method_map, options_map = parser(buf) assert len(method_map) == 4 assert method_map[0] == ('**.py', 'python') diff --git a/tests/messages/test_toml_config.py b/tests/messages/test_toml_config.py new file mode 100644 index 000000000..6a3c15700 --- /dev/null +++ b/tests/messages/test_toml_config.py @@ -0,0 +1,38 @@ +import pathlib +from io import BytesIO + +import pytest + +from babel.messages import frontend + +toml_test_cases_path = pathlib.Path(__file__).parent / "toml-test-cases" +assert toml_test_cases_path.is_dir(), "toml-test-cases directory not found" + + +def test_toml_mapping_multiple_patterns(): + """ + Test that patterns may be specified as a list in TOML, + and are expanded to multiple entries in the method map. + """ + method_map, options_map = frontend._parse_mapping_toml(BytesIO(b""" +[[mappings]] +method = "python" +pattern = ["xyz/**.py", "foo/**.py"] +""")) + assert len(method_map) == 2 + assert method_map[0] == ('xyz/**.py', 'python') + assert method_map[1] == ('foo/**.py', 'python') + + +@pytest.mark.parametrize("test_case", toml_test_cases_path.glob("bad.*.toml"), ids=lambda p: p.name) +def test_bad_toml_test_case(test_case: pathlib.Path): + """ + Test that bad TOML files raise a ValueError. + """ + with pytest.raises(frontend.ConfigurationError): + with test_case.open("rb") as f: + frontend._parse_mapping_toml( + f, + filename=test_case.name, + style="pyproject.toml" if "pyproject" in test_case.name else "standalone", + ) diff --git a/tests/messages/toml-test-cases/bad.extractor.toml b/tests/messages/toml-test-cases/bad.extractor.toml new file mode 100644 index 000000000..9992684d4 --- /dev/null +++ b/tests/messages/toml-test-cases/bad.extractor.toml @@ -0,0 +1,2 @@ +[extractors] +custom = { module = "mypackage.module", func = "myfunc" } diff --git a/tests/messages/toml-test-cases/bad.extractors-not-a-dict.toml b/tests/messages/toml-test-cases/bad.extractors-not-a-dict.toml new file mode 100644 index 000000000..92d767824 --- /dev/null +++ b/tests/messages/toml-test-cases/bad.extractors-not-a-dict.toml @@ -0,0 +1 @@ +[[extractors]] diff --git a/tests/messages/toml-test-cases/bad.just-a-mapping.toml b/tests/messages/toml-test-cases/bad.just-a-mapping.toml new file mode 100644 index 000000000..40006f59a --- /dev/null +++ b/tests/messages/toml-test-cases/bad.just-a-mapping.toml @@ -0,0 +1,3 @@ +[mapping] +method = "jinja2" +pattern = "**.html" diff --git a/tests/messages/toml-test-cases/bad.mapping-not-a-dict.toml b/tests/messages/toml-test-cases/bad.mapping-not-a-dict.toml new file mode 100644 index 000000000..f22367fa1 --- /dev/null +++ b/tests/messages/toml-test-cases/bad.mapping-not-a-dict.toml @@ -0,0 +1 @@ +mappings = [8] diff --git a/tests/messages/toml-test-cases/bad.mappings-not-a-list.toml b/tests/messages/toml-test-cases/bad.mappings-not-a-list.toml new file mode 100644 index 000000000..f30874179 --- /dev/null +++ b/tests/messages/toml-test-cases/bad.mappings-not-a-list.toml @@ -0,0 +1 @@ +mappings = "python" diff --git a/tests/messages/toml-test-cases/bad.missing-extraction-method.toml b/tests/messages/toml-test-cases/bad.missing-extraction-method.toml new file mode 100644 index 000000000..69a2be237 --- /dev/null +++ b/tests/messages/toml-test-cases/bad.missing-extraction-method.toml @@ -0,0 +1,2 @@ +[[mappings]] +pattern = ["xyz/**.py", "foo/**.py"] diff --git a/tests/messages/toml-test-cases/bad.multiple-mappings-not-a-list.toml b/tests/messages/toml-test-cases/bad.multiple-mappings-not-a-list.toml new file mode 100644 index 000000000..48c151328 --- /dev/null +++ b/tests/messages/toml-test-cases/bad.multiple-mappings-not-a-list.toml @@ -0,0 +1,10 @@ +[mappings] +method = "genshi" +pattern = "**/templates/**.html" +include_attrs = "" + +[mappings] +method = "genshi" +pattern = "**/templates/**.txt" +template_class = "genshi.template:TextTemplate" +encoding = "latin-1" diff --git a/tests/messages/toml-test-cases/bad.non-string-extraction-method.toml b/tests/messages/toml-test-cases/bad.non-string-extraction-method.toml new file mode 100644 index 000000000..19c3ece46 --- /dev/null +++ b/tests/messages/toml-test-cases/bad.non-string-extraction-method.toml @@ -0,0 +1,2 @@ +[[mappings]] +method = 42 diff --git a/tests/messages/toml-test-cases/bad.pattern-type-2.toml b/tests/messages/toml-test-cases/bad.pattern-type-2.toml new file mode 100644 index 000000000..d6c1382a7 --- /dev/null +++ b/tests/messages/toml-test-cases/bad.pattern-type-2.toml @@ -0,0 +1,3 @@ +[[mappings]] +method = "big snake" +pattern = [42] diff --git a/tests/messages/toml-test-cases/bad.pattern-type.toml b/tests/messages/toml-test-cases/bad.pattern-type.toml new file mode 100644 index 000000000..598b722de --- /dev/null +++ b/tests/messages/toml-test-cases/bad.pattern-type.toml @@ -0,0 +1,3 @@ +[[mappings]] +method = "big snake" +pattern = 2048 diff --git a/tests/messages/toml-test-cases/bad.pyproject-without-tool-babel.toml b/tests/messages/toml-test-cases/bad.pyproject-without-tool-babel.toml new file mode 100644 index 000000000..598b722de --- /dev/null +++ b/tests/messages/toml-test-cases/bad.pyproject-without-tool-babel.toml @@ -0,0 +1,3 @@ +[[mappings]] +method = "big snake" +pattern = 2048 diff --git a/tests/messages/toml-test-cases/bad.standalone-with-babel-prefix.toml b/tests/messages/toml-test-cases/bad.standalone-with-babel-prefix.toml new file mode 100644 index 000000000..cbc1d8dd2 --- /dev/null +++ b/tests/messages/toml-test-cases/bad.standalone-with-babel-prefix.toml @@ -0,0 +1,2 @@ +[babel.extractors] +custom = "mypackage.module:myfunc"