From 2031205beef8dde04e2ea1bd4e91dde9157efa77 Mon Sep 17 00:00:00 2001 From: Chris Collazo Date: Fri, 20 Sep 2024 14:09:53 -0400 Subject: [PATCH 1/4] Added ziparchives_inmem, which can extract from archives which have been read in as byte strings. This allows extracting from recursive archives in-memory. --- src/inner_test.zip | Bin 0 -> 1662 bytes src/test_ziparchives_inmem.nim | 16 + src/zippy/ziparchives_inmem.nim | 636 ++++++++++++++++++++++++++++++++ 3 files changed, 652 insertions(+) create mode 100644 src/inner_test.zip create mode 100644 src/test_ziparchives_inmem.nim create mode 100644 src/zippy/ziparchives_inmem.nim diff --git a/src/inner_test.zip b/src/inner_test.zip new file mode 100644 index 0000000000000000000000000000000000000000..57406695462f12828c0774fd2a711256d363f23a GIT binary patch literal 1662 zcmai!L1+_E5Qb;FiP-d{2=<_*s8kUtRBLOYRG}ioh7utn)sxs=vr@8>Y|{`etRMkB zR8gqai>UOZB8uXnBuOd5Bxufc@b$%8-N&dmSb%Z}NJ6!GVT z-#>9V{cG}?Ma0ogTgdf_rJ%R!oj;yyC;P+8?=Hu5rGu=*icKVsEF-WW8UW&FZK}!( zjwGV5H>d=D!HHCZP`Ov%Y|yH`tO{oQNHql2?~ltzn9+!RqRJN?(5$wDcB{AUz<>DD z9X`LrS!zg8H~%J!Iw@Sv-KuqbvciS!^O9HB8f%uRu{|J3(Hcuvx^8P+W391fUmBYM zR=U*KFtc(;up>)1C9kftW65>gk&SN>js?6R8KL|-g%PGVt{-hUnu4Px=Zs&N{AcNB z*vZk-(qS-W-KFaZ5INY2ezw8&r;CwWhah|V?%A-G?H8F>R@NVe>`%|;QEmMI%Sv8F z56OJ19uhYvHRvJHxlMO@IPt8pW|bPd6IiLa3H!QyPq4A9v1V5qI}WV$sIlRf`-_5| z=zAoVzFK(5`G1z4#sH7bo&0tKc60C{4f0+wa`Y?Q5f4+dvWUpZT%4I3Lc;O=$x7_)?-2y^)i~ql-~11#uLjL zYu2f;yMUFdo3Ib&o(eXWHP&oPW3#|YlTFy&+g=DZmgA<*aIZM0R$G_d>hA3n4L|Bv zn7WCR^_$o*IKr6+U*q0m-{@-qyfv?Yh-dCP&0EB2R(lY*HTPe1SN%X&Ro+(g7oo*e Ang9R* literal 0 HcmV?d00001 diff --git a/src/test_ziparchives_inmem.nim b/src/test_ziparchives_inmem.nim new file mode 100644 index 0000000..adcf2ef --- /dev/null +++ b/src/test_ziparchives_inmem.nim @@ -0,0 +1,16 @@ +import zippy/ziparchives, zippy/ziparchives_inmem + +proc test_case() = + var archive = open_zip_archive("inner_test.zip") + defer: archive.close() + + for fname in archive.walk_files: + let bytes = archive.extract_file(fname) + var inner_archive = open_zip_archive_bytes(bytes) + defer: inner_archive.close() + + for ifname in inner_archive.walk_files: + let ifbytes = inner_archive.extract_file(ifname) + writeFile(ifname, ifbytes) + +test_case() \ No newline at end of file diff --git a/src/zippy/ziparchives_inmem.nim b/src/zippy/ziparchives_inmem.nim new file mode 100644 index 0000000..361a424 --- /dev/null +++ b/src/zippy/ziparchives_inmem.nim @@ -0,0 +1,636 @@ +import common, crc, internal, std/os, std/strutils, std/tables, + std/times, std/unicode, ziparchives_v1, zippy, std/sequtils + +export common, ziparchives_v1 + +const + fileHeaderLen = 30 + fileHeaderSignature = 0x04034b50.uint32 + centralDirectoryFileHeaderSignature = 0x02014b50.uint32 + endOfCentralDirectorySignature = 0x06054b50.uint32 + zip64EndOfCentralDirectorySignature = 0x06064b50.uint32 + zip64EndOfCentralDirectoryLocatorSignature = 0x07064b50.uint32 + zip64ExtraFieldId = 1.uint16 + +type + ZipArchiveRecordKind = enum + FileRecord, DirectoryRecord + + ZipArchiveRecord = object + kind: ZipArchiveRecordKind + fileHeaderOffset: int + path: string + uncompressedCrc32: uint32 + compressedSize: int + uncompressedSize: int + filePermissions: set[FilePermission] + + ZipArchiveBytesReader* = ref object + file_bytes: string + records: OrderedTable[string, ZipArchiveRecord] + +iterator walkFiles*(reader: ZipArchiveBytesReader): string = + ## Walks over all files in the archive and returns the file name + ## (including the path). + for _, record in reader.records: + if record.kind == FileRecord: + yield record.path + +proc extractFile*( + reader: ZipArchiveBytesReader, path: string +): string {.raises: [ZippyError].} = + + template failNoFileRecord() = + raise newException(ZippyError, "No file record found for " & path) + + let + src = cast[ptr UncheckedArray[uint8]](reader.file_bytes[0].addr) + record = + try: + reader.records[path] + except KeyError: + failNoFileRecord() + + var pos = record.fileHeaderOffset + + if pos + fileHeaderLen > reader.file_bytes.len: + failArchiveEOF() + + if read32(src, pos) != fileHeaderSignature: + raise newException(ZippyError, "Invalid file header") + + let + # minVersionToExtract = read16(src, pos + 4) + # generalPurposeFlag = read16(src, pos + 6) + compressionMethod = read16(src, pos + 8) + # lastModifiedTime = read16(src, pos + 10) + # lastModifiedDate = read16(src, pos + 12) + # uncompressedCrc32 = read32(src, pos + 14) + # compressedSize = read32(src, pos + 18) + # uncompressedSize = read32(src, pos + 22) + fileNameLen = read16(src, pos + 26).int + extraFieldLen = read16(src, pos + 28).int + + pos += fileHeaderLen + fileNameLen + extraFieldLen + + if pos + record.compressedSize > reader.file_bytes.len: + failArchiveEOF() + + case record.kind: + of FileRecord: + if compressionMethod == 0: # No compression + if record.compressedSize > 0: + result.setLen(record.compressedSize) + copyMem(result[0].addr, src[pos].addr, record.compressedSize) + elif compressionMethod == 8: # Deflate + result = uncompress(src[pos].addr, record.compressedSize, dfDeflate) + else: + raise newException(ZippyError, "Unsupported archive, compression method") + of DirectoryRecord: + failNoFileRecord() + + if crc32(result) != record.uncompressedCrc32: + raise newException(ZippyError, "Verifying crc32 failed") + +proc close*(reader: ZipArchiveBytesReader) {.raises: [OSError].} = + # reader.memFile.close() + discard + +proc parseMsDosDateTime(time, date: uint16): Time = + let + seconds = (time and 0b0000000000011111).int * 2 + minutes = ((time shr 5) and 0b0000000000111111).int + hours = ((time shr 11) and 0b0000000000011111).int + days = (date and 0b0000000000011111).int + months = ((date shr 5) and 0b0000000000001111).int + years = ((date shr 9) and 0b0000000001111111).int + if seconds <= 59 and minutes <= 59 and hours <= 23: + result = initDateTime( + days.MonthdayRange, + months.Month, + years + 1980, + hours.HourRange, + minutes.MinuteRange, + seconds.SecondRange, + local() + ).toTime() + +proc utf8ify(fileName: string): string = + const cp437AfterAscii = [ + # 0x80 - 0x8f + 0x00c7.uint32, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, + 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, + # 0x90 - 0x9f + 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, + 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, + # 0xa0 - 0xaf + 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, + 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, + # 0xb0 - 0xbf + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, + 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, + # 0xc0 - 0xcf + 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, + 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, + # 0xd0 - 0xdf + 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, + 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, + # 0xd0 - 0xdf + 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, + 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, + # 0xf0 - 0xff + 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, + 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0 + ] + + if validateUtf8(fileName) == -1: + return fileName + + # If the file name is not valid utf-8, assume it is CP437 / OEM / DOS + var runes: seq[Rune] + for c in fileName: + if c > 0x7f.char: + runes.add Rune(cp437AfterAscii[c.int - 0x80]) + else: + runes.add Rune(c) + $runes + +proc findEndOfCentralDirectory(reader: ZipArchiveBytesReader): int = + let src = cast[ptr UncheckedArray[uint8]](reader.file_bytes[0].addr) + + result = reader.file_bytes.len - 22 # Work backwards in the file starting here + while true: + if result < 0: + failArchiveEOF() + if read32(src, result) == endOfCentralDirectorySignature: + return + else: + dec result + +proc findStartOfCentralDirectory( + reader: ZipArchiveBytesReader, + start, numRecordEntries: int +): int = + let src = cast[ptr UncheckedArray[uint8]](reader.file_bytes[0].addr) + + result = start # Work backwards in the file starting here + var numRecordsFound: int + while true: + if result < 0: + failArchiveEOF() + if read32(src, result) == centralDirectoryFileHeaderSignature: + inc numRecordsFound + if numRecordsFound == numRecordEntries: + return + dec result + +proc openZipArchiveBytes*( + bytes: string +): ZipArchiveBytesReader {.raises: [IOError, OSError, ZippyError].} = + result = ZipArchiveBytesReader() + result.file_bytes = bytes + + try: + let src = cast[ptr UncheckedArray[uint8]](result.file_bytes[0].addr) + + let eocd = result.findEndOfCentralDirectory() + if eocd + 22 > result.file_bytes.len: + failArchiveEOF() + + var zip64 = false + if eocd - 20 >= 0: + if read32(src, eocd - 20) == zip64EndOfCentralDirectoryLocatorSignature: + zip64 = true + + var + diskNumber, startDisk, numRecordsOnDisk, numCentralDirectoryRecords: int + centralDirectorySize, centralDirectoryStart: int + if zip64: + let + zip64EndOfCentralDirectoryDiskNumber = read32(src, eocd - 20 + 4) + zip64EndOfCentralDirectoryStart = read64(src, eocd - 20 + 8).int + numDisks = read32(src, eocd - 20 + 16) + + if zip64EndOfCentralDirectoryDiskNumber != 0: + raise newException(ZippyError, "Unsupported archive, disk number") + + if numDisks != 1: + raise newException(ZippyError, "Unsupported archive, num disks") + + var pos = zip64EndOfCentralDirectoryStart + if pos + 64 > result.file_bytes.len: + failArchiveEOF() + + if read32(src, pos) != zip64EndOfCentralDirectorySignature: + raise newException(ZippyError, "Invalid central directory file header") + + # let + # endOfCentralDirectorySize = read64(src, pos + 4).int + # versionMadeBy = read16(src, pos + 12) + # minVersionToExtract = read16(src, pos + 14) + diskNumber = read32(src, pos + 16).int + startDisk = read32(src, pos + 20).int + numRecordsOnDisk = read64(src, pos + 24).int + numCentralDirectoryRecords = read64(src, pos + 32).int + centralDirectorySize = read64(src, pos + 40).int + centralDirectoryStart = read64(src, pos + 48).int + # anotherDisk = read64(src, pos + 56).int + else: + diskNumber = read16(src, eocd + 4).int + startDisk = read16(src, eocd + 6).int + numRecordsOnDisk = read16(src, eocd + 8).int + numCentralDirectoryRecords = read16(src, eocd + 10).int + centralDirectorySize = read32(src, eocd + 12).int + centralDirectoryStart = read32(src, eocd + 16).int + # commentLen = read16(src, eocd + 20).int + + if diskNumber != 0: + raise newException(ZippyError, "Unsupported archive, disk number") + + if startDisk != 0: + raise newException(ZippyError, "Unsupported archive, start disk") + + if numRecordsOnDisk != numCentralDirectoryRecords: + raise newException(ZippyError, "Unsupported archive, record number") + + # A zip archive may be concatenated to the end of another file (like an + # exe). This handles that by determining where the zip archive is from + # the start of the file. + let + socd = + try: + # Try to find the start relative to the end of the file, supporting + # zip archives being concatenated to the end. If this fails for any + # reason, fall back to the default behavior. + result.findStartOfCentralDirectory(eocd, numCentralDirectoryRecords) + except ZippyError: + centralDirectoryStart + socdOffset = socd - centralDirectoryStart + + var pos = socdOffset + centralDirectoryStart + + if eocd + 22 > result.file_bytes.len: + failArchiveEOF() + + for _ in 0 ..< numCentralDirectoryRecords: + if pos + 46 > result.file_bytes.len: + failArchiveEOF() + + if read32(src, pos) != centralDirectoryFileHeaderSignature: + raise newException(ZippyError, "Invalid central directory file header") + + let + # versionMadeBy = read16(src, pos + 4) + # minVersionToExtract = read16(src, pos + 6) + generalPurposeFlag = read16(src, pos + 8) + compressionMethod = read16(src, pos + 10) + # lastModifiedTime = read16(src, pos + 12) + # lastModifiedDate = read16(src, pos + 14) + uncompressedCrc32 = read32(src, pos + 16) + fileNameLen = read16(src, pos + 28).int + extraFieldLen = read16(src, pos + 30).int + fileCommentLen = read16(src, pos + 32).int + fileDiskNumber = read16(src, pos + 34).int + # internalFileAttr = read16(src, pos + 36) + externalFileAttr = read32(src, pos + 38) + + if compressionMethod notin [0.uint16, 8]: + raise newException(ZippyError, "Unsupported archive, compression method") + + if fileDiskNumber != 0: + raise newException(ZippyError, "Invalid file disk number") + + var + compressedSize = read32(src, pos + 20).int + uncompressedSize = read32(src, pos + 24).int + fileHeaderOffset = read32(src, pos + 42).int + + pos += 46 + + if pos + fileNameLen > result.file_bytes.len: + failArchiveEOF() + + var fileName = newString(fileNameLen) + copyMem(fileName[0].addr, src[pos].addr, fileNameLen) + + if fileName in result.records: + raise newException(ZippyError, "Unsupported archive, duplicate entry") + + pos += fileNameLen + + block: # Handle zip64 values as needed + var extraFieldsOffset = pos + + while extraFieldsOffset < pos + extraFieldLen: + if pos + 4 > result.file_bytes.len: + failArchiveEOF() + + let + fieldId = read16(src, pos + 0) + fieldLen = read16(src, pos + 2).int + + extraFieldsOffset += 4 + + if fieldId != zip64ExtraFieldId: + extraFieldsOffset += fieldLen + else: + # These are the zip64 sizes + var zip64ExtrasOffset = extraFieldsOffset + + if uncompressedSize == 0xffffffff: + if zip64ExtrasOffset + 8 > extraFieldsOffset + fieldLen: + failArchiveEOF() + uncompressedSize = read64(src, zip64ExtrasOffset).int + zip64ExtrasOffset += 8 + + if compressedSize == 0xffffffff: + if zip64ExtrasOffset + 8 > extraFieldsOffset + fieldLen: + failArchiveEOF() + compressedSize = read64(src, zip64ExtrasOffset).int + zip64ExtrasOffset += 8 + + if fileHeaderOffset == 0xffffffff: + if zip64ExtrasOffset + 8 > extraFieldsOffset + fieldLen: + failArchiveEOF() + fileHeaderOffset = read64(src, zip64ExtrasOffset).int + zip64ExtrasOffset += 8 + break + + pos += extraFieldLen + fileCommentLen + + if pos > socdOffset + centralDirectoryStart + centralDirectorySize: + raise newException(ZippyError, "Invalid central directory size") + + let utf8FileName = + if (generalPurposeFlag and 0b100000000000) != 0: + # Language encoding flag (EFS) set, assume utf-8 + fileName + else: + fileName.utf8ify() + + let + dosDirectoryFlag = (externalFileAttr and 0x10) != 0 + unixDirectoryFlag = (externalFileAttr and (S_IFDIR.uint32 shl 16)) != 0 + recordKind = + if dosDirectoryFlag or unixDirectoryFlag or utf8FileName.endsWith("/"): + DirectoryRecord + else: + FileRecord + + result.records[utf8FileName] = ZipArchiveRecord( + kind: recordKind, + fileHeaderOffset: fileHeaderOffset.int + socdOffset, + path: utf8FileName, + compressedSize: compressedSize, + uncompressedSize: uncompressedSize, + uncompressedCrc32: uncompressedCrc32, + filePermissions: parseFilePermissions(externalFileAttr shr 16) + ) + except IOError as e: + result.close() + raise e + except OSError as e: + result.close() + raise e + except ZippyError as e: + result.close() + raise e + +proc extractAll*( + bytes, dest: string +) {.raises: [IOError, OSError, ZippyError].} = + ## Extracts the files stored in archive to the destination directory. + ## The path to the destination directory must exist. + ## The destination directory itself must not exist (it is not overwitten). + if dest == "" or dirExists(dest): + raise newException(ZippyError, "Destination " & dest & " already exists") + + var (head, tail) = splitPath(dest) + if tail == "": # For / at end of path + (head, tail) = splitPath(head) + if head != "" and not dirExists(head): + raise newException(ZippyError, "Path to " & dest & " does not exist") + + let + reader = openZipArchiveBytes(bytes) + src = cast[ptr UncheckedArray[uint8]](reader.file_bytes[0].addr) + + # Verify some things before attempting to write the files + for _, record in reader.records: + record.path.verifyPathIsSafeToExtract() + + try: + # Create the directories and write the extracted files + for _, record in reader.records: + case record.kind: + of DirectoryRecord: + createDir(dest / record.path) + of FileRecord: + createDir(dest / splitFile(record.path).dir) + writeFile(dest / record.path, reader.extractFile(record.path)) + setFilePermissions(dest / record.path, record.filePermissions) + + # Set last modification time as a second pass otherwise directories get + # updated last modification times as files are added on Mac. + for _, record in reader.records: + let + lastModifiedTime = read16(src, record.fileHeaderOffset + 10) + lastModifiedDate = read16(src, record.fileHeaderOffset + 12) + lastModified = parseMsDosDateTime(lastModifiedTime, lastModifiedDate) + setLastModificationTime(dest / record.path, lastModified) + + # If something bad happens delete the destination directory to avoid leaving + # an incomplete extract. + except IOError as e: + removeDir(dest) + raise e + except OSError as e: + removeDir(dest) + raise e + except ZippyError as e: + removeDir(dest) + raise e + finally: + reader.close() + +when (NimMajor, NimMinor, NimPatch) >= (1, 6, 0): + # For some reason `sink Table | OrderedTable` does not work, so work around: + template createZipArchiveImpl( + entries: var Table[string, string] | var OrderedTable[string, string] + ) = + + proc add16(dst: var string, v: int16 | uint16) = + dst.setLen(dst.len + 2) + var tmp = v + copyMem(dst[^2].addr, tmp.addr, 2) + + proc add32(dst: var string, v: int32 | uint32) = + dst.setLen(dst.len + 4) + var tmp = v + copyMem(dst[^4].addr, tmp.addr, 4) + + proc add64(dst: var string, v: int | int64 | uint | uint64) = + dst.setLen(dst.len + 8) + var tmp = v + copyMem(dst[^8].addr, tmp.addr, 8) + + proc msdos(time: Time): (uint16, uint16) = + let + dt = time.local() + seconds = (dt.second div 2).uint16 + minutes = dt.minute.uint16 + hours = dt.hour.uint16 + days = dt.monthday.uint16 + months = dt.month.uint16 + years = (max(0, dt.year - 1980)).uint16 + + var time = seconds + time = (minutes shl 5) or time + time = (hours shl 11) or time + + var date = days + date = (months shl 5) or date + date = (years shl 9) or date + + (time, date) + + let (lastModifiedTime, lastModifiedDate) = msdos(getTime()) + + type ArchiveEntry = object + fileHeaderOffset: int + uncompressedLen: int + compressedLen: int + compressionMethod: uint16 + uncompressedCrc32: uint32 + + var + keys = toSeq(entries.keys) # The entries table is modified so use toSeq + records: seq[(string, ArchiveEntry)] + while keys.len > 0: + let fileName = keys.pop() + if fileName == "": + raise newException(ZippyError, "Invalid empty file name") + if fileName[0] == '/': + raise newException(ZippyError, "File paths must be relative") + if fileName.len > uint16.high.int: + raise newException(ZippyError, "File name len > uint16.high") + + var + uncompressedLen: int + uncompressedCrc32: uint32 + compressed: string + compressionMethod: uint16 + block: # Free `contents` after this block + var contents: string + discard entries.pop(fileName, contents) + uncompressedLen = contents.len + uncompressedCrc32 = crc32(contents) + if contents == "": + discard + else: + compressed = compress(contents, BestSpeed, dfDeflate) + compressionMethod = 8 + + records.add((fileName, ArchiveEntry( + fileHeaderOffset: result.len, + uncompressedLen: uncompressedLen, + compressedLen: compressed.len, + compressionMethod: compressionMethod, + uncompressedCrc32: uncompressedCrc32 + ))) + + result.add32(fileHeaderSignature) + result.add16(45) # Min version to extract + result.add16(1.uint16 shl 11) # General purpose flags + result.add16(compressionMethod) + result.add16(lastModifiedTime) + result.add16(lastModifiedDate) + result.add32(uncompressedCrc32) # CRC-32 of uncompressed data + result.add32(uint32.high) # Compressed size (or 0xffffffff for ZIP64) + result.add32(uint32.high) # Uncompressed size (or 0xffffffff for ZIP64) + result.add16(cast[uint16](fileName.len)) # File name length + result.add16(20) # Extra field length + + result.add(fileName) + + result.add16(zip64ExtraFieldId) + result.add16(16) + result.add64(uncompressedLen) + result.add64(compressed.len) + + # result.add(compressed) + if compressed != "": + result.setLen(result.len + compressed.len) + copyMem( + result[result.len - compressed.len].addr, + compressed.cstring, + compressed.len + ) + + let centralDirectoryStart = result.len + + for i in 0 ..< records.len: + let entry = records[i][1] + result.add32(centralDirectoryFileHeaderSignature) + result.add16(45) # Version made by + result.add16(45) # Min version to extract + result.add16(1.uint16 shl 11) # General purpose flags + result.add16(entry.compressionMethod) + result.add16(lastModifiedTime) + result.add16(lastModifiedDate) + result.add32(entry.uncompressedCrc32) + result.add32(uint32.high) # Compressed size (or 0xffffffff for ZIP64) + result.add32(uint32.high) # Uncompressed size (or 0xffffffff for ZIP64) + result.add16(cast[uint16](records[i][0].len)) # File name length + result.add16(28) # Extra field length + result.add16(0) # File comment length + result.add16(0) # Disk number where file starts + result.add16(0) # Internal file attributes + result.add32(0) # External file attributes + result.add32(uint32.high) # Relative offset of local file header (or 0xffffffff for ZIP64) + + result.add(records[i][0]) + + result.add16(zip64ExtraFieldId) + result.add16(24) + result.add64(entry.uncompressedLen) + result.add64(entry.compressedLen) + result.add64(entry.fileHeaderOffset) + + let centralDirectoryEnd = result.len + + result.add32(zip64EndOfCentralDirectorySignature) + result.add64(44) + result.add16(45) + result.add16(45) + result.add32(0) + result.add32(0) + result.add64(records.len) + result.add64(records.len) + result.add64(centralDirectoryEnd - centralDirectoryStart) + result.add64(centralDirectoryStart) + + result.add32(zip64EndOfCentralDirectoryLocatorSignature) + result.add32(0) + result.add64(centralDirectoryEnd) + result.add32(1) + + result.add32(endOfCentralDirectorySignature) + result.add16(0) # Number of this disk + result.add16(0) # Disk where central directory starts + result.add16(uint16.high) # Number of central directory records on this disk (or 0xffff for ZIP64) + result.add16(uint16.high) # Total number of central directory records (or 0xffff for ZIP64) + result.add32(uint32.high) # Size of central directory (bytes) (or 0xffffffff for ZIP64) + result.add32(uint32.high) # Offset of start of central directory, relative to start of archive (or 0xffffffff for ZIP64) + result.add16(0) + + proc createZipArchive*( + entries: sink Table[string, string] + ): string {.raises: [ZippyError].} = + createZipArchiveImpl(entries) + + proc createZipArchive*( + entries: sink OrderedTable[string, string] + ): string {.raises: [ZippyError].} = + createZipArchiveImpl(entries) + From e1f8dc696e4eca689518b6f840f0a629395beba7 Mon Sep 17 00:00:00 2001 From: Chris Collazo Date: Mon, 23 Sep 2024 10:24:19 -0400 Subject: [PATCH 2/4] Folded in-memory archiver opener as a reader mode. Added utility procs to obtain reader pointer and length. Added wrapper procs to open an archive as either a file or a byte string. Removed module ziparchives_inmem. --- src/test_ziparchives_inmem.nim | 2 +- src/zippy/ziparchives.nim | 71 +++- src/zippy/ziparchives_inmem.nim | 636 -------------------------------- 3 files changed, 52 insertions(+), 657 deletions(-) delete mode 100644 src/zippy/ziparchives_inmem.nim diff --git a/src/test_ziparchives_inmem.nim b/src/test_ziparchives_inmem.nim index adcf2ef..121eb6c 100644 --- a/src/test_ziparchives_inmem.nim +++ b/src/test_ziparchives_inmem.nim @@ -1,4 +1,4 @@ -import zippy/ziparchives, zippy/ziparchives_inmem +import zippy/ziparchives proc test_case() = var archive = open_zip_archive("inner_test.zip") diff --git a/src/zippy/ziparchives.nim b/src/zippy/ziparchives.nim index ccc58a9..633f106 100644 --- a/src/zippy/ziparchives.nim +++ b/src/zippy/ziparchives.nim @@ -16,6 +16,9 @@ type ZipArchiveRecordKind = enum FileRecord, DirectoryRecord + ZipArchiveReaderMode = enum + MemfileMode, StringMode + ZipArchiveRecord = object kind: ZipArchiveRecordKind fileHeaderOffset: int @@ -26,7 +29,9 @@ type filePermissions: set[FilePermission] ZipArchiveReader* = ref object + mode: ZipArchiveReaderMode memFile: MemFile + byteString: string records: OrderedTable[string, ZipArchiveRecord] iterator walkFiles*(reader: ZipArchiveReader): string = @@ -36,6 +41,16 @@ iterator walkFiles*(reader: ZipArchiveReader): string = if record.kind == FileRecord: yield record.path +proc getDataPtr(reader: ZipArchiveReader): ptr UncheckedArray[uint8] = + case reader.mode + of MemfileMode: cast[ptr UncheckedArray[uint8]](reader.memFile.mem) + of StringMode: cast[ptr UncheckedArray[uint8]](reader.byteString[0].addr) + +proc getDataLen(reader: ZipArchiveReader): int = + case reader.mode + of MemfileMode: reader.memFile.size + of StringMode: reader.byteString.len + proc extractFile*( reader: ZipArchiveReader, path: string ): string {.raises: [ZippyError].} = @@ -44,7 +59,7 @@ proc extractFile*( raise newException(ZippyError, "No file record found for " & path) let - src = cast[ptr UncheckedArray[uint8]](reader.memFile.mem) + src = reader.getDataPtr() record = try: reader.records[path] @@ -53,7 +68,7 @@ proc extractFile*( var pos = record.fileHeaderOffset - if pos + fileHeaderLen > reader.memFile.size: + if pos + fileHeaderLen > reader.getDataLen(): failArchiveEOF() if read32(src, pos) != fileHeaderSignature: @@ -73,7 +88,7 @@ proc extractFile*( pos += fileHeaderLen + fileNameLen + extraFieldLen - if pos + record.compressedSize > reader.memFile.size: + if pos + record.compressedSize > reader.getDataLen(): failArchiveEOF() case record.kind: @@ -93,7 +108,8 @@ proc extractFile*( raise newException(ZippyError, "Verifying crc32 failed") proc close*(reader: ZipArchiveReader) {.raises: [OSError].} = - reader.memFile.close() + if reader.mode == MemfileMode: + reader.memFile.close() proc parseMsDosDateTime(time, date: uint16): Time = let @@ -155,9 +171,9 @@ proc utf8ify(fileName: string): string = $runes proc findEndOfCentralDirectory(reader: ZipArchiveReader): int = - let src = cast[ptr UncheckedArray[uint8]](reader.memFile.mem) + let src = reader.getDataPtr() - result = reader.memFile.size - 22 # Work backwards in the file starting here + result = reader.getDataLen() - 22 # Work backwards in the file starting here while true: if result < 0: failArchiveEOF() @@ -170,7 +186,7 @@ proc findStartOfCentralDirectory( reader: ZipArchiveReader, start, numRecordEntries: int ): int = - let src = cast[ptr UncheckedArray[uint8]](reader.memFile.mem) + let src = reader.getDataPtr() result = start # Work backwards in the file starting here var numRecordsFound: int @@ -183,17 +199,16 @@ proc findStartOfCentralDirectory( return dec result -proc openZipArchive*( - zipPath: string -): ZipArchiveReader {.raises: [IOError, OSError, ZippyError].} = - result = ZipArchiveReader() - result.memFile = memfiles.open(zipPath) +proc openZipArchiveInternal*( + reader: ZipArchiveReader +): ZipArchiveReader {.raises: [IOError, OSError, ZippyError].} = + result = reader try: - let src = cast[ptr UncheckedArray[uint8]](result.memFile.mem) + let src = result.getDataPtr() let eocd = result.findEndOfCentralDirectory() - if eocd + 22 > result.memFile.size: + if eocd + 22 > result.getDataLen(): failArchiveEOF() var zip64 = false @@ -217,7 +232,7 @@ proc openZipArchive*( raise newException(ZippyError, "Unsupported archive, num disks") var pos = zip64EndOfCentralDirectoryStart - if pos + 64 > result.memFile.size: + if pos + 64 > result.getDataLen(): failArchiveEOF() if read32(src, pos) != zip64EndOfCentralDirectorySignature: @@ -268,11 +283,11 @@ proc openZipArchive*( var pos = socdOffset + centralDirectoryStart - if eocd + 22 > result.memFile.size: + if eocd + 22 > result.getDataLen(): failArchiveEOF() for _ in 0 ..< numCentralDirectoryRecords: - if pos + 46 > result.memFile.size: + if pos + 46 > result.getDataLen(): failArchiveEOF() if read32(src, pos) != centralDirectoryFileHeaderSignature: @@ -306,7 +321,7 @@ proc openZipArchive*( pos += 46 - if pos + fileNameLen > result.memFile.size: + if pos + fileNameLen > result.getDataLen(): failArchiveEOF() var fileName = newString(fileNameLen) @@ -321,7 +336,7 @@ proc openZipArchive*( var extraFieldsOffset = pos while extraFieldsOffset < pos + extraFieldLen: - if pos + 4 > result.memFile.size: + if pos + 4 > result.getDataLen(): failArchiveEOF() let @@ -395,6 +410,22 @@ proc openZipArchive*( result.close() raise e +proc openZipArchive*( + zipPath: string +): ZipArchiveReader {.raises: [IOError, OSError, ZippyError].} = + result = ZipArchiveReader() + result.mode = MemfileMode + result.memFile = memfiles.open(zipPath) + return openZipArchiveInternal(result) + +proc openZipArchiveBytes*( + byteString: string +): ZipArchiveReader {.raises: [IOError, OSError, ZippyError].} = + result = ZipArchiveReader() + result.mode = StringMode + result.byteString = byteString + return openZipArchiveInternal(result) + proc extractAll*( zipPath, dest: string ) {.raises: [IOError, OSError, ZippyError].} = @@ -412,7 +443,7 @@ proc extractAll*( let reader = openZipArchive(zipPath) - src = cast[ptr UncheckedArray[uint8]](reader.memFile.mem) + src = reader.getDataPtr() # Verify some things before attempting to write the files for _, record in reader.records: diff --git a/src/zippy/ziparchives_inmem.nim b/src/zippy/ziparchives_inmem.nim deleted file mode 100644 index 361a424..0000000 --- a/src/zippy/ziparchives_inmem.nim +++ /dev/null @@ -1,636 +0,0 @@ -import common, crc, internal, std/os, std/strutils, std/tables, - std/times, std/unicode, ziparchives_v1, zippy, std/sequtils - -export common, ziparchives_v1 - -const - fileHeaderLen = 30 - fileHeaderSignature = 0x04034b50.uint32 - centralDirectoryFileHeaderSignature = 0x02014b50.uint32 - endOfCentralDirectorySignature = 0x06054b50.uint32 - zip64EndOfCentralDirectorySignature = 0x06064b50.uint32 - zip64EndOfCentralDirectoryLocatorSignature = 0x07064b50.uint32 - zip64ExtraFieldId = 1.uint16 - -type - ZipArchiveRecordKind = enum - FileRecord, DirectoryRecord - - ZipArchiveRecord = object - kind: ZipArchiveRecordKind - fileHeaderOffset: int - path: string - uncompressedCrc32: uint32 - compressedSize: int - uncompressedSize: int - filePermissions: set[FilePermission] - - ZipArchiveBytesReader* = ref object - file_bytes: string - records: OrderedTable[string, ZipArchiveRecord] - -iterator walkFiles*(reader: ZipArchiveBytesReader): string = - ## Walks over all files in the archive and returns the file name - ## (including the path). - for _, record in reader.records: - if record.kind == FileRecord: - yield record.path - -proc extractFile*( - reader: ZipArchiveBytesReader, path: string -): string {.raises: [ZippyError].} = - - template failNoFileRecord() = - raise newException(ZippyError, "No file record found for " & path) - - let - src = cast[ptr UncheckedArray[uint8]](reader.file_bytes[0].addr) - record = - try: - reader.records[path] - except KeyError: - failNoFileRecord() - - var pos = record.fileHeaderOffset - - if pos + fileHeaderLen > reader.file_bytes.len: - failArchiveEOF() - - if read32(src, pos) != fileHeaderSignature: - raise newException(ZippyError, "Invalid file header") - - let - # minVersionToExtract = read16(src, pos + 4) - # generalPurposeFlag = read16(src, pos + 6) - compressionMethod = read16(src, pos + 8) - # lastModifiedTime = read16(src, pos + 10) - # lastModifiedDate = read16(src, pos + 12) - # uncompressedCrc32 = read32(src, pos + 14) - # compressedSize = read32(src, pos + 18) - # uncompressedSize = read32(src, pos + 22) - fileNameLen = read16(src, pos + 26).int - extraFieldLen = read16(src, pos + 28).int - - pos += fileHeaderLen + fileNameLen + extraFieldLen - - if pos + record.compressedSize > reader.file_bytes.len: - failArchiveEOF() - - case record.kind: - of FileRecord: - if compressionMethod == 0: # No compression - if record.compressedSize > 0: - result.setLen(record.compressedSize) - copyMem(result[0].addr, src[pos].addr, record.compressedSize) - elif compressionMethod == 8: # Deflate - result = uncompress(src[pos].addr, record.compressedSize, dfDeflate) - else: - raise newException(ZippyError, "Unsupported archive, compression method") - of DirectoryRecord: - failNoFileRecord() - - if crc32(result) != record.uncompressedCrc32: - raise newException(ZippyError, "Verifying crc32 failed") - -proc close*(reader: ZipArchiveBytesReader) {.raises: [OSError].} = - # reader.memFile.close() - discard - -proc parseMsDosDateTime(time, date: uint16): Time = - let - seconds = (time and 0b0000000000011111).int * 2 - minutes = ((time shr 5) and 0b0000000000111111).int - hours = ((time shr 11) and 0b0000000000011111).int - days = (date and 0b0000000000011111).int - months = ((date shr 5) and 0b0000000000001111).int - years = ((date shr 9) and 0b0000000001111111).int - if seconds <= 59 and minutes <= 59 and hours <= 23: - result = initDateTime( - days.MonthdayRange, - months.Month, - years + 1980, - hours.HourRange, - minutes.MinuteRange, - seconds.SecondRange, - local() - ).toTime() - -proc utf8ify(fileName: string): string = - const cp437AfterAscii = [ - # 0x80 - 0x8f - 0x00c7.uint32, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, - 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5, - # 0x90 - 0x9f - 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9, - 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192, - # 0xa0 - 0xaf - 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba, - 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb, - # 0xb0 - 0xbf - 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, - 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510, - # 0xc0 - 0xcf - 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f, - 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567, - # 0xd0 - 0xdf - 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b, - 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580, - # 0xd0 - 0xdf - 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, - 0x03a6, 0x0398, 0x03a9, 0x03b4, 0x221e, 0x03c6, 0x03b5, 0x2229, - # 0xf0 - 0xff - 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, - 0x00b0, 0x2219, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x25a0, 0x00a0 - ] - - if validateUtf8(fileName) == -1: - return fileName - - # If the file name is not valid utf-8, assume it is CP437 / OEM / DOS - var runes: seq[Rune] - for c in fileName: - if c > 0x7f.char: - runes.add Rune(cp437AfterAscii[c.int - 0x80]) - else: - runes.add Rune(c) - $runes - -proc findEndOfCentralDirectory(reader: ZipArchiveBytesReader): int = - let src = cast[ptr UncheckedArray[uint8]](reader.file_bytes[0].addr) - - result = reader.file_bytes.len - 22 # Work backwards in the file starting here - while true: - if result < 0: - failArchiveEOF() - if read32(src, result) == endOfCentralDirectorySignature: - return - else: - dec result - -proc findStartOfCentralDirectory( - reader: ZipArchiveBytesReader, - start, numRecordEntries: int -): int = - let src = cast[ptr UncheckedArray[uint8]](reader.file_bytes[0].addr) - - result = start # Work backwards in the file starting here - var numRecordsFound: int - while true: - if result < 0: - failArchiveEOF() - if read32(src, result) == centralDirectoryFileHeaderSignature: - inc numRecordsFound - if numRecordsFound == numRecordEntries: - return - dec result - -proc openZipArchiveBytes*( - bytes: string -): ZipArchiveBytesReader {.raises: [IOError, OSError, ZippyError].} = - result = ZipArchiveBytesReader() - result.file_bytes = bytes - - try: - let src = cast[ptr UncheckedArray[uint8]](result.file_bytes[0].addr) - - let eocd = result.findEndOfCentralDirectory() - if eocd + 22 > result.file_bytes.len: - failArchiveEOF() - - var zip64 = false - if eocd - 20 >= 0: - if read32(src, eocd - 20) == zip64EndOfCentralDirectoryLocatorSignature: - zip64 = true - - var - diskNumber, startDisk, numRecordsOnDisk, numCentralDirectoryRecords: int - centralDirectorySize, centralDirectoryStart: int - if zip64: - let - zip64EndOfCentralDirectoryDiskNumber = read32(src, eocd - 20 + 4) - zip64EndOfCentralDirectoryStart = read64(src, eocd - 20 + 8).int - numDisks = read32(src, eocd - 20 + 16) - - if zip64EndOfCentralDirectoryDiskNumber != 0: - raise newException(ZippyError, "Unsupported archive, disk number") - - if numDisks != 1: - raise newException(ZippyError, "Unsupported archive, num disks") - - var pos = zip64EndOfCentralDirectoryStart - if pos + 64 > result.file_bytes.len: - failArchiveEOF() - - if read32(src, pos) != zip64EndOfCentralDirectorySignature: - raise newException(ZippyError, "Invalid central directory file header") - - # let - # endOfCentralDirectorySize = read64(src, pos + 4).int - # versionMadeBy = read16(src, pos + 12) - # minVersionToExtract = read16(src, pos + 14) - diskNumber = read32(src, pos + 16).int - startDisk = read32(src, pos + 20).int - numRecordsOnDisk = read64(src, pos + 24).int - numCentralDirectoryRecords = read64(src, pos + 32).int - centralDirectorySize = read64(src, pos + 40).int - centralDirectoryStart = read64(src, pos + 48).int - # anotherDisk = read64(src, pos + 56).int - else: - diskNumber = read16(src, eocd + 4).int - startDisk = read16(src, eocd + 6).int - numRecordsOnDisk = read16(src, eocd + 8).int - numCentralDirectoryRecords = read16(src, eocd + 10).int - centralDirectorySize = read32(src, eocd + 12).int - centralDirectoryStart = read32(src, eocd + 16).int - # commentLen = read16(src, eocd + 20).int - - if diskNumber != 0: - raise newException(ZippyError, "Unsupported archive, disk number") - - if startDisk != 0: - raise newException(ZippyError, "Unsupported archive, start disk") - - if numRecordsOnDisk != numCentralDirectoryRecords: - raise newException(ZippyError, "Unsupported archive, record number") - - # A zip archive may be concatenated to the end of another file (like an - # exe). This handles that by determining where the zip archive is from - # the start of the file. - let - socd = - try: - # Try to find the start relative to the end of the file, supporting - # zip archives being concatenated to the end. If this fails for any - # reason, fall back to the default behavior. - result.findStartOfCentralDirectory(eocd, numCentralDirectoryRecords) - except ZippyError: - centralDirectoryStart - socdOffset = socd - centralDirectoryStart - - var pos = socdOffset + centralDirectoryStart - - if eocd + 22 > result.file_bytes.len: - failArchiveEOF() - - for _ in 0 ..< numCentralDirectoryRecords: - if pos + 46 > result.file_bytes.len: - failArchiveEOF() - - if read32(src, pos) != centralDirectoryFileHeaderSignature: - raise newException(ZippyError, "Invalid central directory file header") - - let - # versionMadeBy = read16(src, pos + 4) - # minVersionToExtract = read16(src, pos + 6) - generalPurposeFlag = read16(src, pos + 8) - compressionMethod = read16(src, pos + 10) - # lastModifiedTime = read16(src, pos + 12) - # lastModifiedDate = read16(src, pos + 14) - uncompressedCrc32 = read32(src, pos + 16) - fileNameLen = read16(src, pos + 28).int - extraFieldLen = read16(src, pos + 30).int - fileCommentLen = read16(src, pos + 32).int - fileDiskNumber = read16(src, pos + 34).int - # internalFileAttr = read16(src, pos + 36) - externalFileAttr = read32(src, pos + 38) - - if compressionMethod notin [0.uint16, 8]: - raise newException(ZippyError, "Unsupported archive, compression method") - - if fileDiskNumber != 0: - raise newException(ZippyError, "Invalid file disk number") - - var - compressedSize = read32(src, pos + 20).int - uncompressedSize = read32(src, pos + 24).int - fileHeaderOffset = read32(src, pos + 42).int - - pos += 46 - - if pos + fileNameLen > result.file_bytes.len: - failArchiveEOF() - - var fileName = newString(fileNameLen) - copyMem(fileName[0].addr, src[pos].addr, fileNameLen) - - if fileName in result.records: - raise newException(ZippyError, "Unsupported archive, duplicate entry") - - pos += fileNameLen - - block: # Handle zip64 values as needed - var extraFieldsOffset = pos - - while extraFieldsOffset < pos + extraFieldLen: - if pos + 4 > result.file_bytes.len: - failArchiveEOF() - - let - fieldId = read16(src, pos + 0) - fieldLen = read16(src, pos + 2).int - - extraFieldsOffset += 4 - - if fieldId != zip64ExtraFieldId: - extraFieldsOffset += fieldLen - else: - # These are the zip64 sizes - var zip64ExtrasOffset = extraFieldsOffset - - if uncompressedSize == 0xffffffff: - if zip64ExtrasOffset + 8 > extraFieldsOffset + fieldLen: - failArchiveEOF() - uncompressedSize = read64(src, zip64ExtrasOffset).int - zip64ExtrasOffset += 8 - - if compressedSize == 0xffffffff: - if zip64ExtrasOffset + 8 > extraFieldsOffset + fieldLen: - failArchiveEOF() - compressedSize = read64(src, zip64ExtrasOffset).int - zip64ExtrasOffset += 8 - - if fileHeaderOffset == 0xffffffff: - if zip64ExtrasOffset + 8 > extraFieldsOffset + fieldLen: - failArchiveEOF() - fileHeaderOffset = read64(src, zip64ExtrasOffset).int - zip64ExtrasOffset += 8 - break - - pos += extraFieldLen + fileCommentLen - - if pos > socdOffset + centralDirectoryStart + centralDirectorySize: - raise newException(ZippyError, "Invalid central directory size") - - let utf8FileName = - if (generalPurposeFlag and 0b100000000000) != 0: - # Language encoding flag (EFS) set, assume utf-8 - fileName - else: - fileName.utf8ify() - - let - dosDirectoryFlag = (externalFileAttr and 0x10) != 0 - unixDirectoryFlag = (externalFileAttr and (S_IFDIR.uint32 shl 16)) != 0 - recordKind = - if dosDirectoryFlag or unixDirectoryFlag or utf8FileName.endsWith("/"): - DirectoryRecord - else: - FileRecord - - result.records[utf8FileName] = ZipArchiveRecord( - kind: recordKind, - fileHeaderOffset: fileHeaderOffset.int + socdOffset, - path: utf8FileName, - compressedSize: compressedSize, - uncompressedSize: uncompressedSize, - uncompressedCrc32: uncompressedCrc32, - filePermissions: parseFilePermissions(externalFileAttr shr 16) - ) - except IOError as e: - result.close() - raise e - except OSError as e: - result.close() - raise e - except ZippyError as e: - result.close() - raise e - -proc extractAll*( - bytes, dest: string -) {.raises: [IOError, OSError, ZippyError].} = - ## Extracts the files stored in archive to the destination directory. - ## The path to the destination directory must exist. - ## The destination directory itself must not exist (it is not overwitten). - if dest == "" or dirExists(dest): - raise newException(ZippyError, "Destination " & dest & " already exists") - - var (head, tail) = splitPath(dest) - if tail == "": # For / at end of path - (head, tail) = splitPath(head) - if head != "" and not dirExists(head): - raise newException(ZippyError, "Path to " & dest & " does not exist") - - let - reader = openZipArchiveBytes(bytes) - src = cast[ptr UncheckedArray[uint8]](reader.file_bytes[0].addr) - - # Verify some things before attempting to write the files - for _, record in reader.records: - record.path.verifyPathIsSafeToExtract() - - try: - # Create the directories and write the extracted files - for _, record in reader.records: - case record.kind: - of DirectoryRecord: - createDir(dest / record.path) - of FileRecord: - createDir(dest / splitFile(record.path).dir) - writeFile(dest / record.path, reader.extractFile(record.path)) - setFilePermissions(dest / record.path, record.filePermissions) - - # Set last modification time as a second pass otherwise directories get - # updated last modification times as files are added on Mac. - for _, record in reader.records: - let - lastModifiedTime = read16(src, record.fileHeaderOffset + 10) - lastModifiedDate = read16(src, record.fileHeaderOffset + 12) - lastModified = parseMsDosDateTime(lastModifiedTime, lastModifiedDate) - setLastModificationTime(dest / record.path, lastModified) - - # If something bad happens delete the destination directory to avoid leaving - # an incomplete extract. - except IOError as e: - removeDir(dest) - raise e - except OSError as e: - removeDir(dest) - raise e - except ZippyError as e: - removeDir(dest) - raise e - finally: - reader.close() - -when (NimMajor, NimMinor, NimPatch) >= (1, 6, 0): - # For some reason `sink Table | OrderedTable` does not work, so work around: - template createZipArchiveImpl( - entries: var Table[string, string] | var OrderedTable[string, string] - ) = - - proc add16(dst: var string, v: int16 | uint16) = - dst.setLen(dst.len + 2) - var tmp = v - copyMem(dst[^2].addr, tmp.addr, 2) - - proc add32(dst: var string, v: int32 | uint32) = - dst.setLen(dst.len + 4) - var tmp = v - copyMem(dst[^4].addr, tmp.addr, 4) - - proc add64(dst: var string, v: int | int64 | uint | uint64) = - dst.setLen(dst.len + 8) - var tmp = v - copyMem(dst[^8].addr, tmp.addr, 8) - - proc msdos(time: Time): (uint16, uint16) = - let - dt = time.local() - seconds = (dt.second div 2).uint16 - minutes = dt.minute.uint16 - hours = dt.hour.uint16 - days = dt.monthday.uint16 - months = dt.month.uint16 - years = (max(0, dt.year - 1980)).uint16 - - var time = seconds - time = (minutes shl 5) or time - time = (hours shl 11) or time - - var date = days - date = (months shl 5) or date - date = (years shl 9) or date - - (time, date) - - let (lastModifiedTime, lastModifiedDate) = msdos(getTime()) - - type ArchiveEntry = object - fileHeaderOffset: int - uncompressedLen: int - compressedLen: int - compressionMethod: uint16 - uncompressedCrc32: uint32 - - var - keys = toSeq(entries.keys) # The entries table is modified so use toSeq - records: seq[(string, ArchiveEntry)] - while keys.len > 0: - let fileName = keys.pop() - if fileName == "": - raise newException(ZippyError, "Invalid empty file name") - if fileName[0] == '/': - raise newException(ZippyError, "File paths must be relative") - if fileName.len > uint16.high.int: - raise newException(ZippyError, "File name len > uint16.high") - - var - uncompressedLen: int - uncompressedCrc32: uint32 - compressed: string - compressionMethod: uint16 - block: # Free `contents` after this block - var contents: string - discard entries.pop(fileName, contents) - uncompressedLen = contents.len - uncompressedCrc32 = crc32(contents) - if contents == "": - discard - else: - compressed = compress(contents, BestSpeed, dfDeflate) - compressionMethod = 8 - - records.add((fileName, ArchiveEntry( - fileHeaderOffset: result.len, - uncompressedLen: uncompressedLen, - compressedLen: compressed.len, - compressionMethod: compressionMethod, - uncompressedCrc32: uncompressedCrc32 - ))) - - result.add32(fileHeaderSignature) - result.add16(45) # Min version to extract - result.add16(1.uint16 shl 11) # General purpose flags - result.add16(compressionMethod) - result.add16(lastModifiedTime) - result.add16(lastModifiedDate) - result.add32(uncompressedCrc32) # CRC-32 of uncompressed data - result.add32(uint32.high) # Compressed size (or 0xffffffff for ZIP64) - result.add32(uint32.high) # Uncompressed size (or 0xffffffff for ZIP64) - result.add16(cast[uint16](fileName.len)) # File name length - result.add16(20) # Extra field length - - result.add(fileName) - - result.add16(zip64ExtraFieldId) - result.add16(16) - result.add64(uncompressedLen) - result.add64(compressed.len) - - # result.add(compressed) - if compressed != "": - result.setLen(result.len + compressed.len) - copyMem( - result[result.len - compressed.len].addr, - compressed.cstring, - compressed.len - ) - - let centralDirectoryStart = result.len - - for i in 0 ..< records.len: - let entry = records[i][1] - result.add32(centralDirectoryFileHeaderSignature) - result.add16(45) # Version made by - result.add16(45) # Min version to extract - result.add16(1.uint16 shl 11) # General purpose flags - result.add16(entry.compressionMethod) - result.add16(lastModifiedTime) - result.add16(lastModifiedDate) - result.add32(entry.uncompressedCrc32) - result.add32(uint32.high) # Compressed size (or 0xffffffff for ZIP64) - result.add32(uint32.high) # Uncompressed size (or 0xffffffff for ZIP64) - result.add16(cast[uint16](records[i][0].len)) # File name length - result.add16(28) # Extra field length - result.add16(0) # File comment length - result.add16(0) # Disk number where file starts - result.add16(0) # Internal file attributes - result.add32(0) # External file attributes - result.add32(uint32.high) # Relative offset of local file header (or 0xffffffff for ZIP64) - - result.add(records[i][0]) - - result.add16(zip64ExtraFieldId) - result.add16(24) - result.add64(entry.uncompressedLen) - result.add64(entry.compressedLen) - result.add64(entry.fileHeaderOffset) - - let centralDirectoryEnd = result.len - - result.add32(zip64EndOfCentralDirectorySignature) - result.add64(44) - result.add16(45) - result.add16(45) - result.add32(0) - result.add32(0) - result.add64(records.len) - result.add64(records.len) - result.add64(centralDirectoryEnd - centralDirectoryStart) - result.add64(centralDirectoryStart) - - result.add32(zip64EndOfCentralDirectoryLocatorSignature) - result.add32(0) - result.add64(centralDirectoryEnd) - result.add32(1) - - result.add32(endOfCentralDirectorySignature) - result.add16(0) # Number of this disk - result.add16(0) # Disk where central directory starts - result.add16(uint16.high) # Number of central directory records on this disk (or 0xffff for ZIP64) - result.add16(uint16.high) # Total number of central directory records (or 0xffff for ZIP64) - result.add32(uint32.high) # Size of central directory (bytes) (or 0xffffffff for ZIP64) - result.add32(uint32.high) # Offset of start of central directory, relative to start of archive (or 0xffffffff for ZIP64) - result.add16(0) - - proc createZipArchive*( - entries: sink Table[string, string] - ): string {.raises: [ZippyError].} = - createZipArchiveImpl(entries) - - proc createZipArchive*( - entries: sink OrderedTable[string, string] - ): string {.raises: [ZippyError].} = - createZipArchiveImpl(entries) - From a081b011b5bfc279d2e618c2c4dedd4fcd40da5a Mon Sep 17 00:00:00 2001 From: Chris Collazo Date: Mon, 23 Sep 2024 10:31:44 -0400 Subject: [PATCH 3/4] Removed export of openZipArchiveInternal(). --- src/zippy/ziparchives.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zippy/ziparchives.nim b/src/zippy/ziparchives.nim index 633f106..89e83a6 100644 --- a/src/zippy/ziparchives.nim +++ b/src/zippy/ziparchives.nim @@ -200,7 +200,7 @@ proc findStartOfCentralDirectory( dec result -proc openZipArchiveInternal*( +proc openZipArchiveInternal( reader: ZipArchiveReader ): ZipArchiveReader {.raises: [IOError, OSError, ZippyError].} = result = reader From a51102dd2d9b2dda299585a2bf2f9523ece3e629 Mon Sep 17 00:00:00 2001 From: Chris Collazo Date: Mon, 30 Sep 2024 16:21:25 -0400 Subject: [PATCH 4/4] Added extractAllBytes*(), separated extracting by file and by byte string. --- src/test_ziparchives_inmem.nim | 17 +++++++++++------ src/zippy/ziparchives.nim | 29 ++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/src/test_ziparchives_inmem.nim b/src/test_ziparchives_inmem.nim index 121eb6c..1744106 100644 --- a/src/test_ziparchives_inmem.nim +++ b/src/test_ziparchives_inmem.nim @@ -5,12 +5,17 @@ proc test_case() = defer: archive.close() for fname in archive.walk_files: - let bytes = archive.extract_file(fname) - var inner_archive = open_zip_archive_bytes(bytes) - defer: inner_archive.close() + let inner_bytes = archive.extract_file(fname) + + # First test scenario: Extract file-by-file + # var inner_archive = open_zip_archive_bytes(bytes) + # defer: inner_archive.close() - for ifname in inner_archive.walk_files: - let ifbytes = inner_archive.extract_file(ifname) - writeFile(ifname, ifbytes) + # for ifname in inner_archive.walk_files: + # let ifbytes = inner_archive.extract_file(ifname) + # writeFile(ifname, ifbytes) + + # Second test scenario: Extract whole inner archives + extractAllBytes(inner_bytes, fname & ".d") test_case() \ No newline at end of file diff --git a/src/zippy/ziparchives.nim b/src/zippy/ziparchives.nim index 89e83a6..8b74ee7 100644 --- a/src/zippy/ziparchives.nim +++ b/src/zippy/ziparchives.nim @@ -426,10 +426,8 @@ proc openZipArchiveBytes*( result.byteString = byteString return openZipArchiveInternal(result) -proc extractAll*( - zipPath, dest: string -) {.raises: [IOError, OSError, ZippyError].} = - ## Extracts the files stored in archive to the destination directory. + +proc checkExtractDestination(dest: string) {.raises: [IOError, OSError, ZippyError].} = ## The path to the destination directory must exist. ## The destination directory itself must not exist (it is not overwitten). if dest == "" or dirExists(dest): @@ -441,9 +439,10 @@ proc extractAll*( if head != "" and not dirExists(head): raise newException(ZippyError, "Path to " & dest & " does not exist") - let - reader = openZipArchive(zipPath) - src = reader.getDataPtr() +proc extractAllInternal( + reader: ZipArchiveReader, dest: string +) {.raises: [IOError, OSError, ZippyError].} = + let src = reader.getDataPtr() # Verify some things before attempting to write the files for _, record in reader.records: @@ -483,6 +482,22 @@ proc extractAll*( finally: reader.close() +proc extractAll*( + zipPath, dest: string +) {.raises: [IOError, OSError, ZippyError].} = + ## Extracts the files stored in archive to the destination directory. + checkExtractDestination(dest) + let reader = openZipArchive(zipPath) + extractAllInternal(reader, dest) + +proc extractAllBytes*( + zipBytes, dest: string +) {.raises: [IOError, OSError, ZippyError].} = + ## Extracts the files stored in byte-string archive to the destination directory. + checkExtractDestination(dest) + let reader = openZipArchiveBytes(zipBytes) + extractAllInternal(reader, dest) + when (NimMajor, NimMinor, NimPatch) >= (1, 6, 0): # For some reason `sink Table | OrderedTable` does not work, so work around: template createZipArchiveImpl(