Skip to content

Commit

Permalink
Merge pull request #787 from AndrewFasano/fix_786
Browse files Browse the repository at this point in the history
fix(gzip): empty files are invalid and should not be parsed
  • Loading branch information
qkaiser authored Feb 28, 2024
2 parents 61f6925 + b3739c2 commit bbfca4e
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 7 deletions.
12 changes: 12 additions & 0 deletions tests/test_file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,18 @@ def fake_file() -> File:
return File.from_bytes(b"0123456789abcdefghijklmnopqrst")


class TestFile:
def test_file_from_empty_bytes(self):
with pytest.raises(InvalidInputFormat):
File.from_bytes(b"")

def test_file_from_empty_file(self, tmp_path):
file_path = tmp_path / "file"
file_path.touch()
with pytest.raises(InvalidInputFormat):
File.from_path(file_path)


class TestStructParser:
def test_parse_correct_endianness(self):
test_content = b"\x01\x02\x03\x04"
Expand Down
15 changes: 10 additions & 5 deletions unblob/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,17 @@ class SeekError(ValueError):
"""Specific ValueError for File.seek."""


class InvalidInputFormat(Exception):
pass


class File(mmap.mmap):
access: int

@classmethod
def from_bytes(cls, content: bytes):
if not content:
raise InvalidInputFormat("Can't create File from empty bytes.")
m = cls(-1, len(content))
m.write(content)
m.seek(0)
Expand All @@ -55,7 +61,10 @@ def from_bytes(cls, content: bytes):
def from_path(cls, path: Path, access=mmap.ACCESS_READ):
mode = "r+b" if access == mmap.ACCESS_WRITE else "rb"
with path.open(mode) as base_file:
m = cls(base_file.fileno(), 0, access=access)
try:
m = cls(base_file.fileno(), 0, access=access)
except ValueError as exc:
raise InvalidInputFormat from exc
m.access = access
return m

Expand Down Expand Up @@ -115,10 +124,6 @@ def tell(self):
return self._file.tell() - self._offset


class InvalidInputFormat(Exception):
pass


class Endian(enum.Enum):
LITTLE = "<"
BIG = ">"
Expand Down
6 changes: 5 additions & 1 deletion unblob/handlers/archive/sevenzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,11 @@ class MultiVolumeSevenZipHandler(DirectoryHandler):

def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
paths = sorted(
[p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()]
[
p
for p in file.parent.glob(f"{file.stem}.*")
if p.resolve().exists() and p.stat().st_size > 0
]
)
if not paths:
return None
Expand Down
6 changes: 5 additions & 1 deletion unblob/handlers/compression/gzip.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,11 @@ def is_valid_gzip(self, path: Path) -> bool:

def calculate_multifile(self, file: Path) -> Optional[MultiFile]:
paths = sorted(
[p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()]
[
p
for p in file.parent.glob(f"{file.stem}.*")
if p.resolve().exists() and p.stat().st_size > 0
]
)

# we 'discard' paths that are not the first in the ordered list,
Expand Down

0 comments on commit bbfca4e

Please sign in to comment.