diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index 1fc94138c2..0eb50fe8f5 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -81,6 +81,18 @@ def fake_file() -> File: return File.from_bytes(b"0123456789abcdefghijklmnopqrst") +class TestFile: + def test_file_from_empty_bytes(self): + with pytest.raises(InvalidInputFormat): + File.from_bytes(b"") + + def test_file_from_empty_file(self, tmp_path): + file_path = tmp_path / "file" + file_path.touch() + with pytest.raises(InvalidInputFormat): + File.from_path(file_path) + + class TestStructParser: def test_parse_correct_endianness(self): test_content = b"\x01\x02\x03\x04" diff --git a/unblob/file_utils.py b/unblob/file_utils.py index 21e887b487..790c2075f4 100644 --- a/unblob/file_utils.py +++ b/unblob/file_utils.py @@ -40,11 +40,17 @@ class SeekError(ValueError): """Specific ValueError for File.seek.""" +class InvalidInputFormat(Exception): + pass + + class File(mmap.mmap): access: int @classmethod def from_bytes(cls, content: bytes): + if not content: + raise InvalidInputFormat("Can't create File from empty bytes.") m = cls(-1, len(content)) m.write(content) m.seek(0) @@ -55,7 +61,10 @@ def from_bytes(cls, content: bytes): def from_path(cls, path: Path, access=mmap.ACCESS_READ): mode = "r+b" if access == mmap.ACCESS_WRITE else "rb" with path.open(mode) as base_file: - m = cls(base_file.fileno(), 0, access=access) + try: + m = cls(base_file.fileno(), 0, access=access) + except ValueError as exc: + raise InvalidInputFormat from exc m.access = access return m @@ -115,10 +124,6 @@ def tell(self): return self._file.tell() - self._offset -class InvalidInputFormat(Exception): - pass - - class Endian(enum.Enum): LITTLE = "<" BIG = ">" diff --git a/unblob/handlers/archive/sevenzip.py b/unblob/handlers/archive/sevenzip.py index f81019f737..7254629f6a 100644 --- a/unblob/handlers/archive/sevenzip.py +++ b/unblob/handlers/archive/sevenzip.py @@ -107,7 +107,11 @@ class MultiVolumeSevenZipHandler(DirectoryHandler): def calculate_multifile(self, file: Path) -> Optional[MultiFile]: paths = sorted( - [p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()] + [ + p + for p in file.parent.glob(f"{file.stem}.*") + if p.resolve().exists() and p.stat().st_size > 0 + ] ) if not paths: return None diff --git a/unblob/handlers/compression/gzip.py b/unblob/handlers/compression/gzip.py index eb6f57a4a8..a1dcd1b53b 100644 --- a/unblob/handlers/compression/gzip.py +++ b/unblob/handlers/compression/gzip.py @@ -167,7 +167,11 @@ def is_valid_gzip(self, path: Path) -> bool: def calculate_multifile(self, file: Path) -> Optional[MultiFile]: paths = sorted( - [p for p in file.parent.glob(f"{file.stem}.*") if p.resolve().exists()] + [ + p + for p in file.parent.glob(f"{file.stem}.*") + if p.resolve().exists() and p.stat().st_size > 0 + ] ) # we 'discard' paths that are not the first in the ordered list,