diff --git a/tests/integration/archive/zip/zip64/__input__/zip64-without-cd.zip b/tests/integration/archive/zip/zip64/__input__/zip64-without-cd.zip new file mode 100644 index 0000000000..a536305240 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__input__/zip64-without-cd.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4ea164d7b08ba3ee6dbea6808e7d5438ffbf317420e28d0c5e42b7090f42851 +size 126 diff --git a/tests/integration/archive/zip/zip64/__output__/zip64-without-cd.zip_extract/- b/tests/integration/archive/zip/zip64/__output__/zip64-without-cd.zip_extract/- new file mode 100644 index 0000000000..fb25b54492 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__output__/zip64-without-cd.zip_extract/- @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303980bcb9e9e6cdec515230791af8b0ab1aaa244b58a8d99152673aa22197d0 +size 6 diff --git a/unblob/handlers/archive/zip.py b/unblob/handlers/archive/zip.py index 0a63ec09fb..44f945e944 100644 --- a/unblob/handlers/archive/zip.py +++ b/unblob/handlers/archive/zip.py @@ -114,9 +114,9 @@ def is_zip64_eocd(end_of_central_directory): or end_of_central_directory.offset_of_cd == 0xFFFFFFFF ) - @staticmethod - def is_zip64_cd_file(file_header): + def has_zip64_tag(self, file): # see https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.3.9.2 + file_header = self.cparser_le.partial_cd_file_header_t(file) return ( file_header.file_size == 0xFFFFFFFF or file_header.compress_size == 0xFFFFFFFF @@ -144,23 +144,23 @@ def _parse_zip64(self, file: File, start_offset: int, offset: int): "Missing ZIP64 EOCD header record header in ZIP chunk." ) return zip64_eocd - raise InvalidInputFormat( - "Missing ZIP64 EOCD locator record header in ZIP chunk." - ) + return None + + def get_zip64_eocd(self, file, start_offset, offset, end_of_central_directory): + # some values in the CD can be FFFF, indicating its a zip64 + # if the offset of the CD is 0xFFFFFFFF, its definitely one + # otherwise we check every other header indicating zip64 + if self.is_zip64_eocd(end_of_central_directory): + return self._parse_zip64(file, start_offset, offset) - def is_zip64(self, file, start_offset, offset, end_of_central_directory): absolute_offset_of_cd = start_offset + end_of_central_directory.offset_of_cd if 0 < absolute_offset_of_cd < offset: file.seek(absolute_offset_of_cd, io.SEEK_SET) - file_header = self.cparser_le.partial_cd_file_header_t(file) - if self.is_zip64_cd_file(file_header): - return True + if self.has_zip64_tag(file): + return self._parse_zip64(file, start_offset, offset) - # some values in the CD can be FFFF, indicating its a zip64 - # if the offset of the CD is 0xFFFFFFFF, its definitely one - # otherwise we check every other header indicating zip64 - return self.is_zip64_eocd(end_of_central_directory) + return None def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: has_encrypted_files = False @@ -173,9 +173,11 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk] file.seek(offset, io.SEEK_SET) end_of_central_directory = self.parse_header(file) - if self.is_zip64(file, start_offset, offset, end_of_central_directory): - file.seek(offset, io.SEEK_SET) - end_of_central_directory = self._parse_zip64(file, start_offset, offset) + zip64_eocd = self.get_zip64_eocd( + file, start_offset, offset, end_of_central_directory + ) + if zip64_eocd is not None: + end_of_central_directory = zip64_eocd break # the EOCD offset is equal to the offset of CD + size of CD @@ -188,10 +190,7 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk] if offset == end_of_central_directory_offset: break else: - if offset is None: - raise InvalidInputFormat("Missing EOCD record header in ZIP chunk.") - # if we can't find a valid 32bit ZIP EOCD, we fall back to ZIP64 - end_of_central_directory = self._parse_zip64(file, start_offset, offset) + raise InvalidInputFormat("Missing EOCD record header in ZIP chunk.") has_encrypted_files = self.has_encrypted_files( file, start_offset, end_of_central_directory