Skip to content

Commit

Permalink
fix(handler): improve zip64 detection
Browse files Browse the repository at this point in the history
We improve the zip64 detection by trying to parse the zip64 early.
If the zip file contains a zip64 header, we are sure its a zip64.
If it doesn't hold a zip64 header, we are sure its not a zip64 and we fall back to zip32.

In order to create the test files, we used this command
```
cat somefile.txt | zip > zip64.zip
zip -F zip64.zip --out zip64-without-cd.zip
```
  • Loading branch information
Antoine Pecoraro authored and kissgyorgy committed Jul 30, 2024
1 parent 3d541a4 commit 2ffcb1b
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 20 deletions.
Git LFS file not shown
Git LFS file not shown
39 changes: 19 additions & 20 deletions unblob/handlers/archive/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ def is_zip64_eocd(end_of_central_directory):
or end_of_central_directory.offset_of_cd == 0xFFFFFFFF
)

@staticmethod
def is_zip64_cd_file(file_header):
def has_zip64_tag(self, file):
# see https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.3.9.2
file_header = self.cparser_le.partial_cd_file_header_t(file)
return (
file_header.file_size == 0xFFFFFFFF
or file_header.compress_size == 0xFFFFFFFF
Expand Down Expand Up @@ -144,23 +144,23 @@ def _parse_zip64(self, file: File, start_offset: int, offset: int):
"Missing ZIP64 EOCD header record header in ZIP chunk."
)
return zip64_eocd
raise InvalidInputFormat(
"Missing ZIP64 EOCD locator record header in ZIP chunk."
)
return None

def get_zip64_eocd(self, file, start_offset, offset, end_of_central_directory):
# some values in the CD can be FFFF, indicating its a zip64
# if the offset of the CD is 0xFFFFFFFF, its definitely one
# otherwise we check every other header indicating zip64
if self.is_zip64_eocd(end_of_central_directory):
return self._parse_zip64(file, start_offset, offset)

def is_zip64(self, file, start_offset, offset, end_of_central_directory):
absolute_offset_of_cd = start_offset + end_of_central_directory.offset_of_cd

if 0 < absolute_offset_of_cd < offset:
file.seek(absolute_offset_of_cd, io.SEEK_SET)
file_header = self.cparser_le.partial_cd_file_header_t(file)
if self.is_zip64_cd_file(file_header):
return True
if self.has_zip64_tag(file):
return self._parse_zip64(file, start_offset, offset)

# some values in the CD can be FFFF, indicating its a zip64
# if the offset of the CD is 0xFFFFFFFF, its definitely one
# otherwise we check every other header indicating zip64
return self.is_zip64_eocd(end_of_central_directory)
return None

def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
has_encrypted_files = False
Expand All @@ -173,9 +173,11 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]
file.seek(offset, io.SEEK_SET)
end_of_central_directory = self.parse_header(file)

if self.is_zip64(file, start_offset, offset, end_of_central_directory):
file.seek(offset, io.SEEK_SET)
end_of_central_directory = self._parse_zip64(file, start_offset, offset)
zip64_eocd = self.get_zip64_eocd(
file, start_offset, offset, end_of_central_directory
)
if zip64_eocd is not None:
end_of_central_directory = zip64_eocd
break

# the EOCD offset is equal to the offset of CD + size of CD
Expand All @@ -188,10 +190,7 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]
if offset == end_of_central_directory_offset:
break
else:
if offset is None:
raise InvalidInputFormat("Missing EOCD record header in ZIP chunk.")
# if we can't find a valid 32bit ZIP EOCD, we fall back to ZIP64
end_of_central_directory = self._parse_zip64(file, start_offset, offset)
raise InvalidInputFormat("Missing EOCD record header in ZIP chunk.")

has_encrypted_files = self.has_encrypted_files(
file, start_offset, end_of_central_directory
Expand Down

0 comments on commit 2ffcb1b

Please sign in to comment.