Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

experiment with removing descriptors from zip file entries during updates #512

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 79 additions & 44 deletions src/ICSharpCode.SharpZipLib/Zip/ZipFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -999,16 +999,19 @@ public bool TestArchive(bool testData, TestStrategy strategy, ZipTestResultHandl
if (this[entryIndex].Crc != data.Crc)
{
status.AddError();
resultHandler?.Invoke(status, "Descriptor CRC mismatch");
}

if (this[entryIndex].CompressedSize != data.CompressedSize)
{
status.AddError();
resultHandler?.Invoke(status, "Descriptor compressed size mismatch");
}

if (this[entryIndex].Size != data.Size)
{
status.AddError();
resultHandler?.Invoke(status, "Descriptor size mismatch");
}
}
}
Expand Down Expand Up @@ -1917,11 +1920,9 @@ public void Modify(ZipEntry original, ZipEntry updated)
if ( original == null ) {
throw new ArgumentNullException("original");
}

if ( updated == null ) {
throw new ArgumentNullException("updated");
}

CheckUpdating();
contentsEdited_ = true;
updates_.Add(new ZipUpdate(original, updated));
Expand Down Expand Up @@ -2049,8 +2050,14 @@ private void WriteLocalEntryHeader(ZipUpdate update)
// TODO: Local offset will require adjusting for multi-disk zip files.
entry.Offset = baseStream_.Position;

// TODO: Need to clear any entry flags that dont make sense or throw an exception here.
if (update.Command != UpdateCommand.Copy)
// set flags based on the type of entry being written
if (update.Command == UpdateCommand.Copy)
{
// We're copying an existing entry during an update.
// In this case, we won't need descriptors as we should always know the size/CRC fields of the entry directly
entry.Flags &= ~(int)GeneralBitFlags.Descriptor;
}
else
{
if (entry.CompressionMethod == CompressionMethod.Deflated)
{
Expand Down Expand Up @@ -2382,26 +2389,37 @@ private byte[] GetBuffer()

private void CopyDescriptorBytes(ZipUpdate update, Stream dest, Stream source)
{
int bytesToCopy = GetDescriptorSize(update);
// Don't include the signature size to allow copy without seeking
var bytesToCopy = GetDescriptorSize(update, false);

// Don't touch the source stream if no descriptor is present
if (bytesToCopy == 0) return;

if (bytesToCopy > 0)
var buffer = GetBuffer();

// Copy the first 4 bytes of the descriptor
source.Read(buffer, 0, sizeof(int));
dest.Write(buffer, 0, sizeof(int));

if (BitConverter.ToUInt32(buffer, 0) != ZipConstants.DataDescriptorSignature)
{
byte[] buffer = GetBuffer();
// The initial bytes wasn't the descriptor, reduce the pending byte count
bytesToCopy -= buffer.Length;
}

while (bytesToCopy > 0)
{
int readSize = Math.Min(buffer.Length, bytesToCopy);
while (bytesToCopy > 0)
{
int readSize = Math.Min(buffer.Length, bytesToCopy);

int bytesRead = source.Read(buffer, 0, readSize);
if (bytesRead > 0)
{
dest.Write(buffer, 0, bytesRead);
bytesToCopy -= bytesRead;
}
else
{
throw new ZipException("Unxpected end of stream");
}
int bytesRead = source.Read(buffer, 0, readSize);
if (bytesRead > 0)
{
dest.Write(buffer, 0, bytesRead);
bytesToCopy -= bytesRead;
}
else
{
throw new ZipException("Unxpected end of stream");
}
}
}
Expand Down Expand Up @@ -2460,32 +2478,37 @@ private void CopyBytes(ZipUpdate update, Stream destination, Stream source,
/// Get the size of the source descriptor for a <see cref="ZipUpdate"/>.
/// </summary>
/// <param name="update">The update to get the size for.</param>
/// <returns>The descriptor size, zero if there isnt one.</returns>
private int GetDescriptorSize(ZipUpdate update)
/// <param name="includingSignature">Whether to include the signature size</param>
/// <returns>The descriptor size, zero if there isn't one.</returns>
private int GetDescriptorSize(ZipUpdate update, bool includingSignature)
{
int result = 0;
if ((update.Entry.Flags & (int)GeneralBitFlags.Descriptor) != 0)
{
result = ZipConstants.DataDescriptorSize - 4;
if (update.Entry.LocalHeaderRequiresZip64)
{
result = ZipConstants.Zip64DataDescriptorSize - 4;
}
}
return result;
if (!((GeneralBitFlags)update.Entry.Flags).HasFlag(GeneralBitFlags.Descriptor))
return 0;

var descriptorWithSignature = update.Entry.LocalHeaderRequiresZip64
? ZipConstants.Zip64DataDescriptorSize
: ZipConstants.DataDescriptorSize;

return includingSignature
? descriptorWithSignature
: descriptorWithSignature - sizeof(int);
}

private void CopyDescriptorBytesDirect(ZipUpdate update, Stream stream, ref long destinationPosition, long sourcePosition)
{
int bytesToCopy = GetDescriptorSize(update);
var buffer = GetBuffer(); ;

stream.Position = sourcePosition;
stream.Read(buffer, 0, sizeof(int));
var sourceHasSignature = BitConverter.ToUInt32(buffer, 0) == ZipConstants.DataDescriptorSignature;

var bytesToCopy = GetDescriptorSize(update, sourceHasSignature);

while (bytesToCopy > 0)
{
var readSize = (int)bytesToCopy;
byte[] buffer = GetBuffer();

stream.Position = sourcePosition;
int bytesRead = stream.Read(buffer, 0, readSize);

var bytesRead = stream.Read(buffer, 0, bytesToCopy);
if (bytesRead > 0)
{
stream.Position = destinationPosition;
Expand All @@ -2496,7 +2519,7 @@ private void CopyDescriptorBytesDirect(ZipUpdate update, Stream stream, ref long
}
else
{
throw new ZipException("Unxpected end of stream");
throw new ZipException("Unexpected end of stream");
}
}
}
Expand Down Expand Up @@ -2743,6 +2766,7 @@ private void CopyEntryDirect(ZipFile workFile, ZipUpdate update, ref long destin

// Clumsy way of handling retrieving the original name and extra data length for now.
// TODO: Stop re-reading name and data length in CopyEntryDirect.

uint nameLength = ReadLEUshort();
uint extraLength = ReadLEUshort();

Expand All @@ -2751,14 +2775,25 @@ private void CopyEntryDirect(ZipFile workFile, ZipUpdate update, ref long destin
if (skipOver)
{
if (update.OffsetBasedSize != -1)
{
destinationPosition += update.OffsetBasedSize;
}
else
// TODO: Find out why this calculation comes up 4 bytes short on some entries in ODT (Office Document Text) archives.
// WinZip produces a warning on these entries:
// "caution: value of lrec.csize (compressed size) changed from ..."
destinationPosition +=
(sourcePosition - entryDataOffset) + NameLengthOffset + // Header size
update.Entry.CompressedSize + GetDescriptorSize(update);
{
// Skip entry header
destinationPosition += (sourcePosition - entryDataOffset) + NameLengthOffset;

// Skip entry compressed data
destinationPosition += update.Entry.CompressedSize;

// Seek to end of entry to check for descriptor signature
baseStream_.Seek(destinationPosition, SeekOrigin.Begin);

var descriptorHasSignature = ReadLEUint() == ZipConstants.DataDescriptorSignature;

// Skip descriptor and it's signature (if present)
destinationPosition += GetDescriptorSize(update, descriptorHasSignature);
}
}
else
{
Expand Down
Loading