diff --git a/CHANGELOG.md b/CHANGELOG.md index e9cbff8..5679cee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [UNRELEASED] + +* Fixed a problem with dependency detection when the package name differed from the key in package-lock.json (#36). + ## [0.5.0] - 2025-07-18 * Updated to Shinylive web assets 0.5.0. diff --git a/shinylive/_deps.py b/shinylive/_deps.py index 80f5c6f..1fe91e5 100644 --- a/shinylive/_deps.py +++ b/shinylive/_deps.py @@ -36,7 +36,7 @@ } # Packages that should always be included in a Shinylive deployment. -BASE_PYODIDE_PACKAGES = {"distutils", "micropip", "ssl"} +BASE_PYODIDE_PACKAGE_NAMES = {"distutils", "micropip", "ssl"} AssetType = Literal["base", "python", "r"] @@ -56,6 +56,11 @@ class PyodidePackageInfo(TypedDict): # The package information structure used by Pyodide's pyodide-lock.json. +# Note that the key in `packages` may be something like "jsonschema-specifications", +# but the actual name of the package may be different, like "jsonschema_specifications". +# (The "name" entry in the PyodidePackageInfo object is the actual package name.) +# And also further note that the module names in the "imports" list are not necessarily +# the same as either: the "opencv-python" package has a module name "cv2". class PyodideLockFile(TypedDict): info: dict[str, str] packages: dict[str, PyodidePackageInfo] @@ -92,9 +97,15 @@ def _dep_names_to_pyodide_pkg_infos( dep_names: Iterable[str], ) -> list[PyodidePackageInfo]: pyodide_lock = _pyodide_lock_data() - pkg_infos: list[PyodidePackageInfo] = [ - copy.deepcopy(pyodide_lock["packages"][dep_name]) for dep_name in dep_names - ] + pkg_infos: list[PyodidePackageInfo] = [] + + for dep_name in dep_names: + dep_key = dep_name_to_dep_key(dep_name) + if dep_key is None: + continue + pkg_info = copy.deepcopy(pyodide_lock["packages"][dep_key]) + pkg_infos.append(pkg_info) + return pkg_infos @@ -378,7 +389,7 @@ def base_package_deps() -> list[PyodidePackageInfo]: Return list of python packages that should be included in all python Shinylive deployments. The returned data structure is a list of PyodidePackageInfo objects. """ - dep_names = _find_recursive_deps(BASE_PYODIDE_PACKAGES) + dep_names = _find_recursive_deps(BASE_PYODIDE_PACKAGE_NAMES) pkg_infos = _dep_names_to_pyodide_pkg_infos(dep_names) return pkg_infos @@ -388,7 +399,7 @@ def base_package_deps() -> list[PyodidePackageInfo]: # Internal functions # ============================================================================= def _find_recursive_deps( - pkgs: Iterable[str], + dep_names: Iterable[str], verbose_print: Callable[..., None] = lambda *args: None, ) -> list[str]: """ @@ -397,45 +408,77 @@ def _find_recursive_deps( packages passed in. """ pyodide_lock = _pyodide_lock_data() - deps = list(pkgs) + + # The keys in pyodide_lock are not the same as the package names. For example, the + # key "jsonschema-specifications" points to an object where the "name" entry is + # "jsonschema_specifications". The dependencies are listed with names, not keys. + + dep_names = list(dep_names) i = 0 - while i < len(deps): - dep = deps[i] - if dep not in pyodide_lock["packages"]: - # TODO: Need to distinguish between built-in packages and external ones in - # requirements.txt. - verbose_print( - f" {dep} not in pyodide-lock.json. Assuming it is in base Pyodide or in requirements.txt." - ) - deps.remove(dep) + while i < len(dep_names): + dep_name = dep_names[i] + dep_key: str | None = dep_name_to_dep_key(dep_name) + + if dep_key not in pyodide_lock["packages"]: + if dep_name not in BASE_PYODIDE_PACKAGE_NAMES: + # TODO: Need to distinguish between built-in packages and external ones in + # requirements.txt. + verbose_print( + f" {dep_name} not in pyodide-lock.json. Assuming it is in base Pyodide or in requirements.txt." + ) + dep_names.remove(dep_name) continue - dep_deps = set(pyodide_lock["packages"][dep]["depends"]) - new_deps = dep_deps.difference(deps) - deps.extend(new_deps) + dep_depnames = set(pyodide_lock["packages"][dep_key]["depends"]) + new_depnames = dep_depnames.difference(dep_names) + dep_names.extend(new_depnames) i += 1 - return deps + return dep_names -def _dep_name_to_dep_file(dep_name: str) -> str: +def dep_name_to_dep_key(name: str) -> str | None: """ - Given the name of a dependency, like "pandas", return the name of the .whl file, - like "pandas-1.4.2-cp310-cp310-emscripten_3_1_14_wasm32.whl". + Convert a package name to a key that can be used to look up the package in + pyodide-lock.json. + + The keys in pyodide-lock.json are not the same as the package names. For example, + the key "jsonschema-specifications" points to an object where the "name" entry is + "jsonschema_specifications". + + Note that the names are lowercased because the package names should be treated as + case-insensitive. https://github.com/pyodide/pyodide/issues/1614 """ - pyodide_lock = _pyodide_lock_data() - return pyodide_lock["packages"][dep_name]["file_name"] + # Special case for base pyodide packages + if name in BASE_PYODIDE_PACKAGE_NAMES: + return name + + name = name.lower() + if name not in _dep_name_to_dep_key_mappings(): + return None + + return _dep_name_to_dep_key_mappings()[name] -def _dep_names_to_dep_files(dep_names: list[str]) -> list[str]: +@functools.lru_cache +def _dep_name_to_dep_key_mappings() -> dict[str, str]: """ - Given a list of dependency names, like ["pandas"], return a list with the names of - corresponding .whl files (from data in pyodide-lock.json), like - ["pandas-1.4.2-cp310-cp310-emscripten_3_1_14_wasm32.whl"]. + Return a dictionary that maps package names to keys. This is needed because + sometimes the package name and package key are different. For example, the package + name is "jsonschema_specifications", but the package key is + "jsonschema-specifications". + + Note that the names are lowercased because the package names should be treated as + case-insensitive. https://github.com/pyodide/pyodide/issues/1614 """ + name_to_key: dict[str, str] = {} + pyodide_lock = _pyodide_lock_data() - dep_files = [pyodide_lock["packages"][x]["file_name"] for x in dep_names] - return dep_files + for key, pkg_info in pyodide_lock["packages"].items(): + name = pkg_info["name"].lower() + name_to_key[name] = key + + return name_to_key def _find_import_app_contents(app_contents: list[FileContentJson]) -> set[str]: @@ -452,7 +495,7 @@ def _find_import_app_contents(app_contents: list[FileContentJson]) -> set[str]: # Note that at this point, the imports are module names, like "cv2", but these can # sometimes differ from the package names, like "opencv-python". We need to map from # module names to package names. - packages = [module_to_package(x) for x in imports] + packages = [module_to_package_key(x) for x in imports] packages = [x for x in packages if x is not None] return set(packages) @@ -468,7 +511,7 @@ def _find_requirements_app_contents(app_contents: list[FileContentJson]) -> set[ """ packages: set[str] = set() for file_content in app_contents: - if not file_content["name"] != "requirements.txt": + if file_content["name"] != "requirements.txt": continue packages = packages.union( @@ -478,12 +521,12 @@ def _find_requirements_app_contents(app_contents: list[FileContentJson]) -> set[ return packages -def module_to_package(module: str) -> str | None: +def module_to_package_key(module: str) -> str | None: """ Given a module name, like "cv2", return the corresponding package name, like "opencv-python". If not found, return None. """ - module_to_package = _module_to_package_mappings() + module_to_package = _module_to_package_key_mappings() if module in module_to_package: return module_to_package[module] else: @@ -491,7 +534,7 @@ def module_to_package(module: str) -> str | None: @functools.lru_cache -def _module_to_package_mappings() -> dict[str, str]: +def _module_to_package_key_mappings() -> dict[str, str]: """ Return a dictionary that maps module names to package names. This is needed because sometimes the module name and package name are different. For example, the module @@ -499,10 +542,10 @@ def _module_to_package_mappings() -> dict[str, str]: """ pyodide_lock = _pyodide_lock_data() module_to_package: dict[str, str] = {} - for pkg_name, pkg_info in pyodide_lock["packages"].items(): + for pkg_key, pkg_info in pyodide_lock["packages"].items(): modules = pkg_info["imports"] for module in modules: - module_to_package[module] = pkg_name + module_to_package[module] = pkg_key return module_to_package @@ -597,7 +640,10 @@ def _find_packages_in_requirements(req_txt: str) -> list[str]: else: # If we got here, it's a package specification. # Remove any trailing version info: "my-package (>= 1.0.0)" -> "my-package" - pkg_name = re.sub(r"([a-zA-Z0-9._-]+)(.*)", r"\\1", line).strip() + pkg_name = re.sub(r"([a-zA-Z0-9._-]+)(.*)", r"\1", line).strip() + # Replace underscores with hyphens: "typing_extensions" -> "typing-extensions" + pkg_name = pkg_name.replace("_", "-") + reqs.append(pkg_name) return reqs diff --git a/shinylive/_version/__init__.py b/shinylive/_version/__init__.py index 9c75e65..94aacb9 100644 --- a/shinylive/_version/__init__.py +++ b/shinylive/_version/__init__.py @@ -1,5 +1,5 @@ # The version of this Python package. -SHINYLIVE_PACKAGE_VERSION = "0.5.0" +SHINYLIVE_PACKAGE_VERSION = "0.5.0.9000" # This is the version of the Shinylive assets to use. SHINYLIVE_ASSETS_VERSION = "0.5.0" diff --git a/tests/test_assets.py b/tests/test_assets.py index e410012..65f8b15 100644 --- a/tests/test_assets.py +++ b/tests/test_assets.py @@ -1,4 +1,4 @@ -"""Tests for Shinlyive assets.""" +"""Tests for Shinylive assets.""" import os diff --git a/tests/test_deps.py b/tests/test_deps.py new file mode 100644 index 0000000..b88f48f --- /dev/null +++ b/tests/test_deps.py @@ -0,0 +1,96 @@ +"""Tests for Shinylive dependency detection.""" + +import os + +import pytest + + +def test_requirements_txt(): + from shinylive._deps import _find_packages_in_requirements + + requirements_txt = """ + typing_extensions + jsonschema-specifications (<1.0) + # comment + """ + + # This should convert '_' to '-', and remove the version constraints. + assert _find_packages_in_requirements(requirements_txt) == [ + "typing-extensions", + "jsonschema-specifications", + ] + + # Should preserve case here (in other steps it will be lowercased). + assert _find_packages_in_requirements("Jinja2") == ["Jinja2"] + assert _find_packages_in_requirements("jinja2") == ["jinja2"] + + +# ====================================================================================== +# Don't run remaining tests in CI, unless we're triggered by a release event. This is +# because they require the assets to be installed. In the future, it would make sense to +# run this test when we're on an rc branch. +# ====================================================================================== +if os.environ.get("CI") == "true" and os.environ.get("GITHUB_EVENT_NAME") != "release": + pytest.skip( + reason="Don't run this test in CI, unless we're on a release branch.", + allow_module_level=True, + ) + + +def test_module_to_package_key(): + from shinylive._deps import module_to_package_key + + assert module_to_package_key("cv2") == "opencv-python" + assert module_to_package_key("black") == "black" + assert module_to_package_key("jinja2") == "jinja2" + + # Should be case sensitive for module names. + assert module_to_package_key("Jinja2") is None + + assert module_to_package_key("foobar") is None + + +def test_dep_name_to_dep_key(): + from shinylive._deps import dep_name_to_dep_key + + assert dep_name_to_dep_key("black") == "black" + assert dep_name_to_dep_key("typing-extensions") == "typing-extensions" + assert ( + dep_name_to_dep_key("jsonschema_specifications-tests") + == "jsonschema-specifications-tests" + ) + + # Should not convert `_` to `-` + assert dep_name_to_dep_key("typing_extensions") is None + + # Should be case insensitive to input. + assert dep_name_to_dep_key("Jinja2") == "jinja2" + assert dep_name_to_dep_key("JiNJa2") == "jinja2" + + assert dep_name_to_dep_key("cv2") is None + + # Special case for a base pyodide package. It is not in pyodide_lock.json but should + # be included in the list of dependencies. + assert dep_name_to_dep_key("distutils") == "distutils" + + +def test_find_recursive_deps(): + from shinylive._deps import _find_recursive_deps + + # It is possible that these dependencies will change in future versions of Pyodide, + # but the reason we're testing jsonschema specifically is because it includes + # jsonschema_specifications, which is the package name (and not the key). + assert sorted(_find_recursive_deps(["jsonschema"])) == [ + "attrs", + "jsonschema", + "jsonschema_specifications", + "pyrsistent", + "referencing", + "rpds-py", + "six", + ] + + assert sorted(_find_recursive_deps(["opencv-python"])) == [ + "numpy", + "opencv-python", + ]