From cdb311460e6b8ce437ac713f8d2b5b707cdd1a66 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Fri, 15 Sep 2023 10:32:11 -0700 Subject: [PATCH 1/2] runner.conda: Plug isolation leaks related to Python MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By default Python will search for modules in the user site directory, e.g. ~/.local/lib/python3.10/site-packages/. This is an isolation leak that can cause conflicts with our Conda runtime since its not containerized, and we observed such an issue during a workshop. Plug that leak by both disabling the searching of a user site directory entirely (the proximate issue) and pointing the whole Python user base directory to an alternate location (a preventative measure against other user base directory usages). Similarly, PYTHONPATH and (probably more rarely) PYTHONHOME also have the potential to cause similar issues, so we now ensure they're unset when entering our Conda runtime. I reviewed other environment variables used by Python¹ and they seem reasonable to leave as-is (at least at this point). There are a few which could cause issues, but I expect they'd be limited to usage for debugging/troubleshooting/interactive use. Resolves . ¹ --- nextstrain/cli/runner/conda.py | 66 +++++++++++++++++----------------- 1 file changed, 34 insertions(+), 32 deletions(-) diff --git a/nextstrain/cli/runner/conda.py b/nextstrain/cli/runner/conda.py index 1cd0840c..d802a657 100644 --- a/nextstrain/cli/runner/conda.py +++ b/nextstrain/cli/runner/conda.py @@ -108,6 +108,37 @@ NEXTSTRAIN_BASE = os.environ.get("NEXTSTRAIN_CONDA_BASE_PACKAGE") \ or "nextstrain-base" +PYTHONUSERBASE = RUNTIME_ROOT / "python-user-base" + +# Construct a PATH with our runtime prefix which provides some, but not total, +# isolation from the rest of the system. +PATH = os.pathsep.join(map(str, [ + # Programs installed by this runtime. + PREFIX_BIN, + + # Python's idea of a default path for the system, which currently under + # CPython is either "/bin:/usr/bin" on POSIX systems or ".;C:\\bin" on + # Windows. This will ensure basic system commands like `ls` are + # available, although it will also "leak" any user-installed programs + # there. + os.defpath, +])) + +EXEC_ENV = { + "PATH": PATH, + + # Avoid letting user-set custom Python installs and module search paths + # from outside the runtime leak inside. + "PYTHONHOME": None, + "PYTHONPATH": None, + + # Avoid letting the user site directory leak into the runtime, c.f. + # and + # . + "PYTHONUSERBASE": str(PYTHONUSERBASE), + "PYTHONNOUSERSITE": "1", +} + def register_arguments(parser) -> None: """ @@ -136,36 +167,7 @@ def run(opts, argv, working_volume = None, extra_env: Env = {}, cpus: int = None # DLL searching). # -trs, 13 Jan 2023 - extra_env = { - **extra_env, - "PATH": path_with_prefix(), - } - - return exec_or_return(argv, extra_env) - - -def path_with_prefix() -> str: - """ - Constructs a ``PATH`` with our runtime prefix. - - The returned ``PATH`` consists of the: - - 1. Runtime prefix - 2. :py:attr:`os.defpath` - - which provides some, but not total, isolation from the rest of the system. - """ - return os.pathsep.join(map(str, [ - # Programs installed by this runtime. - PREFIX_BIN, - - # Python's idea of a default path for the system, which currently under - # CPython is either "/bin:/usr/bin" on POSIX systems or ".;C:\\bin" on - # Windows. This will ensure basic system commands like `ls` are - # available, although it will also "leak" any user-installed programs - # there. - os.defpath, - ])) + return exec_or_return(argv, {**extra_env, **EXEC_ENV}) def setup(dry_run: bool = False, force: bool = False) -> RunnerSetupStatus: @@ -395,7 +397,7 @@ def which_finds_our(cmd) -> bool: # ".exe" extension on Windows, which is why we don't just naively test # for existence ourselves. File extensions are also why we don't test # equality below instead check containment in PREFIX_BIN. - found = shutil.which(cmd, path = path_with_prefix()) + found = shutil.which(cmd, path = PATH) if not found: return False @@ -412,7 +414,7 @@ def which_finds_our(cmd) -> bool: def runnable(*argv) -> bool: try: - capture_output(argv, extra_env = {"PATH": path_with_prefix()}) + capture_output(argv, extra_env = EXEC_ENV) return True except (OSError, subprocess.CalledProcessError): return False From a9e6c34b1801dc10369c30aeded4657c4919081a Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 19 Sep 2023 16:02:41 -0700 Subject: [PATCH 2/2] CHANGES: Document Conda runtime leakage fixes --- CHANGES.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index adac5abb..75b1d2d2 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,15 @@ development source code and as such may not be routinely kept up to date. # __NEXT__ +## Bug fixes + +* We've plugged some isolation leaks in the Conda runtime where the + [Python user site directory](https://docs.python.org/3/library/site.html), + e.g. `~/.local/lib/pythonX.Y/site-packages`, as well as the + [`PYTHONPATH` and `PYTHONHOME` environment variables](https://docs.python.org/3/using/cmdline.html#environment-variables) + could influence and break the runtime. + ([#311](https://github.com/nextstrain/cli/pull/311)) + # 7.3.0.post1 (19 September 2023) @@ -22,6 +31,7 @@ _See also changes in 7.3.0 which was an unreleased version._ * Update CI to test against the SingularityCE 3.x series only ([#314](https://github.com/nextstrain/cli/pull/314)) + # 7.3.0 (19 September 2023) _Unreleased due to [test failures](https://github.com/nextstrain/cli/actions/runs/6238951959). Changes first released as part of 7.3.0.post1._