diff --git a/tests/integration/archive/zip/zip64/__input__/colors.zip b/tests/integration/archive/zip/zip64/__input__/colors.zip new file mode 100644 index 0000000000..3422aaff0c --- /dev/null +++ b/tests/integration/archive/zip/zip64/__input__/colors.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a59995571929d2047445ef9ead25ebe1560a6b52a8204ed342c72f8eb624bec9 +size 457 diff --git a/tests/integration/archive/zip/zip64/__input__/colors_garbage.zip b/tests/integration/archive/zip/zip64/__input__/colors_garbage.zip new file mode 100644 index 0000000000..2d32ab2e49 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__input__/colors_garbage.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cca8132ff6bf4a712056e15507643bb32ddd1283cf84d85157c9c2a299cf6749 +size 1481 diff --git a/tests/integration/archive/zip/zip64/__input__/zero_edited.zip b/tests/integration/archive/zip/zip64/__input__/zero_edited.zip new file mode 100644 index 0000000000..dc0711c730 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__input__/zero_edited.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7321376ddcdde9617b95332c6c3b3e1e612fa352e18e8eb99faff7580e456ddc +size 6252481 diff --git a/tests/integration/archive/zip/zip64/__output__/colors.zip_extract/blue.txt b/tests/integration/archive/zip/zip64/__output__/colors.zip_extract/blue.txt new file mode 100644 index 0000000000..1fc5aa04b5 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__output__/colors.zip_extract/blue.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0bee6616b5e5eae6799cb4525a884a82e7161614f11122bbdf4383b2ac05998 +size 5 diff --git a/tests/integration/archive/zip/zip64/__output__/colors.zip_extract/red.txt b/tests/integration/archive/zip/zip64/__output__/colors.zip_extract/red.txt new file mode 100644 index 0000000000..d9f993ceb7 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__output__/colors.zip_extract/red.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ace33171ce0acb6891e3cc311d75a97aa429d77c05cba600d49ed9652ed49de +size 4 diff --git a/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/0-1024.unknown b/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/0-1024.unknown new file mode 100644 index 0000000000..f9ffca6f65 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/0-1024.unknown @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fdac534f308dcc7708b640a33ace78c5cffb61a90d425b07409f0b66056bee0 +size 1024 diff --git a/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/1024-1481.zip b/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/1024-1481.zip new file mode 100644 index 0000000000..3422aaff0c --- /dev/null +++ b/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/1024-1481.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a59995571929d2047445ef9ead25ebe1560a6b52a8204ed342c72f8eb624bec9 +size 457 diff --git a/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/1024-1481.zip_extract/blue.txt b/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/1024-1481.zip_extract/blue.txt new file mode 100644 index 0000000000..1fc5aa04b5 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/1024-1481.zip_extract/blue.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0bee6616b5e5eae6799cb4525a884a82e7161614f11122bbdf4383b2ac05998 +size 5 diff --git a/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/1024-1481.zip_extract/red.txt b/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/1024-1481.zip_extract/red.txt new file mode 100644 index 0000000000..d9f993ceb7 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__output__/colors_garbage.zip_extract/1024-1481.zip_extract/red.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ace33171ce0acb6891e3cc311d75a97aa429d77c05cba600d49ed9652ed49de +size 4 diff --git a/tests/integration/archive/zip/zip64/__output__/zero_edited.zip_extract/zero b/tests/integration/archive/zip/zip64/__output__/zero_edited.zip_extract/zero new file mode 100644 index 0000000000..2e17b1d0a5 --- /dev/null +++ b/tests/integration/archive/zip/zip64/__output__/zero_edited.zip_extract/zero @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1d13c5021fad1bbc1fd95d8597efd5822fa179b00df8dd3fa4d5eb9f73c78b2 +size 6252215 diff --git a/unblob/handlers/archive/zip.py b/unblob/handlers/archive/zip.py index c48a396ecb..8d740a5e05 100644 --- a/unblob/handlers/archive/zip.py +++ b/unblob/handlers/archive/zip.py @@ -36,9 +36,9 @@ class ZIPHandler(StructHandler): uint16 internal_file_attr; uint32 external_file_attr; uint32 relative_offset_local_header; - char file_name[file_name_length]; - char extra_field[extra_field_length]; - } cd_file_header_t; + // char file_name[file_name_length]; + // char extra_field[extra_field_length]; + } partial_cd_file_header_t; typedef struct end_of_central_directory { @@ -94,8 +94,12 @@ def has_encrypted_files( ) -> bool: file.seek(start_offset + end_of_central_directory.offset_of_cd, io.SEEK_SET) for _ in range(end_of_central_directory.total_entries): - cd_header = self.cparser_le.cd_file_header_t(file) - if cd_header.flags & self.ENCRYPTED_FLAG: + file_header = self.cparser_le.partial_cd_file_header_t(file) + file.seek( + file_header.file_name_length + file_header.extra_field_length, + io.SEEK_CUR, + ) + if file_header.flags & self.ENCRYPTED_FLAG: return True return False @@ -111,6 +115,14 @@ def is_zip64_eocd(end_of_central_directory: Instance): or end_of_central_directory.offset_of_cd == 0xFFFFFFFF ) + @staticmethod + def is_zip64_cd_file(file_header: Instance): + # see https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT section 4.3.9.2 + return ( + file_header.file_size == 0xFFFFFFFF + or file_header.compress_size == 0xFFFFFFFF + ) + def _parse_zip64(self, file: File, start_offset: int, offset: int) -> Instance: file.seek(start_offset, io.SEEK_SET) for eocd_locator_offset in iterate_patterns( @@ -137,6 +149,20 @@ def _parse_zip64(self, file: File, start_offset: int, offset: int) -> Instance: "Missing ZIP64 EOCD locator record header in ZIP chunk." ) + def is_zip64(self, file, start_offset, offset, end_of_central_directory): + absolute_offset_of_cd = start_offset + end_of_central_directory.offset_of_cd + + if 0 < absolute_offset_of_cd < offset: + file.seek(absolute_offset_of_cd, io.SEEK_SET) + file_header = self.cparser_le.partial_cd_file_header_t(file) + if self.is_zip64_cd_file(file_header): + return True + + # some values in the CD can be FFFF, indicating its a zip64 + # if the offset of the CD is 0xFFFFFFFF, its definitely one + # otherwise we check every other header indicating zip64 + return self.is_zip64_eocd(end_of_central_directory) + def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: has_encrypted_files = False file.seek(start_offset, io.SEEK_SET) @@ -147,7 +173,8 @@ def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk] file.seek(offset, io.SEEK_SET) end_of_central_directory = self.parse_header(file) - if self.is_zip64_eocd(end_of_central_directory): + if self.is_zip64(file, start_offset, offset, end_of_central_directory): + file.seek(offset, io.SEEK_SET) end_of_central_directory = self._parse_zip64(file, start_offset, offset) break