Skip to content

Commit

Permalink
fix(handlers): handle dangling symlinks in MultiFile handlers.
Browse files Browse the repository at this point in the history
MultiFile handlers would collect files within a directory corresponding
to a specific schema without checking if those files are actually
present.

For example, a directory could contain dangling symlinks with a name
corresponding to the glob search. This would lead to FileNotFoundError
being thrown by the multi-file handlers.
  • Loading branch information
qkaiser committed Feb 16, 2024
1 parent 841aa29 commit 25c43e3
Show file tree
Hide file tree
Showing 10 changed files with 22 additions and 4 deletions.
Git LFS file not shown
Git LFS file not shown
8 changes: 6 additions & 2 deletions unblob/handlers/archive/sevenzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ class MultiVolumeSevenZipHandler(DirectoryHandler):
PATTERN = Glob("*.7z.001")

def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
paths = sorted(
[p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()]
)
if not paths:
return None

with file.open("rb") as f:
header_data = f.read(HEADER_SIZE)

Expand All @@ -117,8 +123,6 @@ def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
size = calculate_sevenzip_size(header)
logger.debug("Sevenzip header", header=header, size=size, _verbosity=3)

paths = sorted(file.parent.glob(f"{file.stem}.*"))

files_size = sum(path.stat().st_size for path in paths)
logger.debug(
"Multi-volume files", paths=paths, files_size=files_size, _verbosity=2
Expand Down
6 changes: 4 additions & 2 deletions unblob/handlers/compression/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,14 @@ def is_valid_gzip(self, path: Path) -> bool:
return True

def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
paths = sorted(file.parent.glob(f"{file.stem}.*"))
paths = sorted(
[p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()]
)

# we 'discard' paths that are not the first in the ordered list,
# otherwise we will end up with colliding reports, one for every
# path in the list.
if file != paths[0]:
if not paths or file != paths[0]:
return None

if self.is_valid_gzip(file):
Expand Down

0 comments on commit 25c43e3

Please sign in to comment.