diff --git a/tests/integration/archive/tar/__input__/symlinks.tar b/tests/integration/archive/tar/__input__/symlinks.tar new file mode 100644 index 0000000000..8ef485d3ab --- /dev/null +++ b/tests/integration/archive/tar/__input__/symlinks.tar @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:152c7a03b3b3276f8a7527bfde01f26b0c4f2ae3f705a6deac0183d8bac2ba1c +size 10240 diff --git a/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/absolute-symlink b/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/absolute-symlink index 5b0423aa0e..23dcdb8ce7 120000 --- a/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/absolute-symlink +++ b/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/absolute-symlink @@ -1 +1 @@ -absolute/absolute-file \ No newline at end of file +absolute-file \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/link_to_etc_shadow b/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/link_to_etc_shadow index 5d50cf53f5..c3342eb2fa 120000 --- a/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/link_to_etc_shadow +++ b/tests/integration/archive/tar/__output__/absolute.tar_extract/absolute/link_to_etc_shadow @@ -1 +1 @@ -etc/shadow \ No newline at end of file +../etc/shadow \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/bin/busybox b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/bin/busybox new file mode 100644 index 0000000000..6dfcf97655 --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/bin/busybox @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58eaf5a78d580f5dbd49d31a5b733094169b31bfdf49055b74bcac2877d8f58c +size 7 diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/cp b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/cp new file mode 120000 index 0000000000..71259713ee --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/cp @@ -0,0 +1 @@ +../bin/busybox \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/find b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/find new file mode 120000 index 0000000000..71259713ee --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/find @@ -0,0 +1 @@ +../bin/busybox \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ln b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ln new file mode 120000 index 0000000000..71259713ee --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ln @@ -0,0 +1 @@ +../bin/busybox \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ls b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ls new file mode 120000 index 0000000000..71259713ee --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/ls @@ -0,0 +1 @@ +../bin/busybox \ No newline at end of file diff --git a/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/mv b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/mv new file mode 120000 index 0000000000..f948f1a72d --- /dev/null +++ b/tests/integration/archive/tar/__output__/symlinks.tar_extract/test/sbin/mv @@ -0,0 +1 @@ +../../bin/busybox \ No newline at end of file diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index 9a20da7420..1fc94138c2 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -25,7 +25,7 @@ round_down, round_up, ) -from unblob.report import PathTraversalProblem +from unblob.report import LinkExtractionProblem, PathTraversalProblem @pytest.mark.parametrize( @@ -503,6 +503,30 @@ def test_create_symlink(self, sandbox: FileSystem): assert os.readlink(output_path) == "target file" assert sandbox.problems == [] + def test_create_symlink_target_inside_sandbox(self, sandbox: FileSystem): + # ./sbin/shell -> ../bin/sh + sandbox.mkdir(Path("bin")) + sandbox.write_bytes(Path("bin/sh"), b"posix shell") + sandbox.mkdir(Path("sbin")) + sandbox.create_symlink(Path("../bin/sh"), Path("sbin/shell")) + + output_path = sandbox.root / "sbin/shell" + assert output_path.read_bytes() == b"posix shell" + assert output_path.exists() + assert os.readlink(output_path) == "../bin/sh" + assert sandbox.problems == [] + + def test_create_symlink_target_outside_sandbox(self, sandbox: FileSystem): + # /shell -> ../bin/sh + sandbox.mkdir(Path("bin")) + sandbox.write_bytes(Path("bin/sh"), b"posix shell") + sandbox.create_symlink(Path("../bin/sh"), Path("/shell")) + + assert any(p for p in sandbox.problems if isinstance(p, LinkExtractionProblem)) + output_path = sandbox.root / "shell" + assert not output_path.exists() + assert not output_path.is_symlink() + def test_create_symlink_absolute_paths(self, sandbox: FileSystem): sandbox.write_bytes(Path("target file"), b"test content") sandbox.create_symlink(Path("/target file"), Path("/symlink")) diff --git a/unblob/handlers/archive/_safe_tarfile.py b/unblob/handlers/archive/_safe_tarfile.py index 0ecc2e081e..86203f454b 100644 --- a/unblob/handlers/archive/_safe_tarfile.py +++ b/unblob/handlers/archive/_safe_tarfile.py @@ -63,7 +63,7 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901 "Absolute path.", "Converted to extraction relative path.", ) - tarinfo.name = f"./{tarinfo.name}" + tarinfo.name = str(Path(tarinfo.name).relative_to("/")) # prevent traversal attempts through file name if not is_safe_path(basedir=extract_root, path=extract_root / tarinfo.name): @@ -77,16 +77,36 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901 # prevent traversal attempts through links if tarinfo.islnk() or tarinfo.issym(): if Path(tarinfo.linkname).is_absolute(): + + def calculate_linkname(): + root = extract_root.resolve() + path = (extract_root / tarinfo.name).resolve() + + if path.parts[: len(root.parts)] != root.parts: + return None + + depth = max(0, len(path.parts) - len(root.parts) - 1) + return ("/".join([".."] * depth) or ".") + tarinfo.linkname + + relative_linkname = calculate_linkname() + if relative_linkname is None: + self.record_problem( + tarinfo, + "Absolute path conversion to extraction relative failed - would escape root.", + "Skipped.", + ) + return + + assert not Path(relative_linkname).is_absolute() self.record_problem( tarinfo, "Absolute path as link target.", "Converted to extraction relative path.", ) - tarinfo.linkname = f"./{tarinfo.linkname}" - if not is_safe_path( - basedir=extract_root, - path=extract_root / tarinfo.linkname, - ): + tarinfo.linkname = relative_linkname + + resolved_path = (extract_root / tarinfo.name).parent / tarinfo.linkname + if not is_safe_path(basedir=extract_root, path=resolved_path): self.record_problem( tarinfo, "Traversal attempt through link path.",