Skip to content

Commit

Permalink
Merge pull request #901 from onekey-sec/900-invalid-valid-zip64
Browse files Browse the repository at this point in the history
fix(handler): improve zip64 detection
  • Loading branch information
kissgyorgy authored Jul 30, 2024
2 parents 3d541a4 + 2ffcb1b commit ce2f1ee
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 20 deletions.
Git LFS file not shown
Git LFS file not shown
39 changes: 19 additions & 20 deletions unblob/handlers/archive/zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ def is_zip64_eocd(end_of_central_directory):
or end_of_central_directory.offset_of_cd == 0xFFFFFFFF
)

@staticmethod
def is_zip64_cd_file(file_header):
def has_zip64_tag(self, file):
# see https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.3.9.2
file_header = self.cparser_le.partial_cd_file_header_t(file)
return (
file_header.file_size == 0xFFFFFFFF
or file_header.compress_size == 0xFFFFFFFF
Expand Down Expand Up @@ -144,23 +144,23 @@ def _parse_zip64(self, file: File, start_offset: int, offset: int):
"Missing ZIP64 EOCD header record header in ZIP chunk."
)
return zip64_eocd
raise InvalidInputFormat(
"Missing ZIP64 EOCD locator record header in ZIP chunk."
)
return None

def get_zip64_eocd(self, file, start_offset, offset, end_of_central_directory):
# some values in the CD can be FFFF, indicating its a zip64
# if the offset of the CD is 0xFFFFFFFF, its definitely one
# otherwise we check every other header indicating zip64
if self.is_zip64_eocd(end_of_central_directory):
return self._parse_zip64(file, start_offset, offset)

def is_zip64(self, file, start_offset, offset, end_of_central_directory):
absolute_offset_of_cd = start_offset + end_of_central_directory.offset_of_cd

if 0 < absolute_offset_of_cd < offset:
file.seek(absolute_offset_of_cd, io.SEEK_SET)
file_header = self.cparser_le.partial_cd_file_header_t(file)
if self.is_zip64_cd_file(file_header):
return True
if self.has_zip64_tag(file):
return self._parse_zip64(file, start_offset, offset)

# some values in the CD can be FFFF, indicating its a zip64
# if the offset of the CD is 0xFFFFFFFF, its definitely one
# otherwise we check every other header indicating zip64
return self.is_zip64_eocd(end_of_central_directory)
return None

def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]:
has_encrypted_files = False
Expand All @@ -173,9 +173,11 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]
file.seek(offset, io.SEEK_SET)
end_of_central_directory = self.parse_header(file)

if self.is_zip64(file, start_offset, offset, end_of_central_directory):
file.seek(offset, io.SEEK_SET)
end_of_central_directory = self._parse_zip64(file, start_offset, offset)
zip64_eocd = self.get_zip64_eocd(
file, start_offset, offset, end_of_central_directory
)
if zip64_eocd is not None:
end_of_central_directory = zip64_eocd
break

# the EOCD offset is equal to the offset of CD + size of CD
Expand All @@ -188,10 +190,7 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]
if offset == end_of_central_directory_offset:
break
else:
if offset is None:
raise InvalidInputFormat("Missing EOCD record header in ZIP chunk.")
# if we can't find a valid 32bit ZIP EOCD, we fall back to ZIP64
end_of_central_directory = self._parse_zip64(file, start_offset, offset)
raise InvalidInputFormat("Missing EOCD record header in ZIP chunk.")

has_encrypted_files = self.has_encrypted_files(
file, start_offset, end_of_central_directory
Expand Down

0 comments on commit ce2f1ee

Please sign in to comment.