From ba1fcb7c64d8766704337971e48b0ec805733507 Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Sun, 26 May 2024 18:15:28 +0100 Subject: [PATCH 1/9] Changed behaviour of WriteFile WriteFile now takes a more granular approach to writing the ZipArchive to its stream. Adding a new file to the end of the archive will no longer require every file in the archive to be loaded into memory. Changing fixed-length metadata inside a file will no longer require the entire archive to be re-written. --- .../src/System/IO/Compression/ZipArchive.cs | 145 +++++++++++-- .../System/IO/Compression/ZipArchiveEntry.cs | 198 ++++++++++++------ .../src/System/IO/Compression/ZipBlocks.cs | 4 + 3 files changed, 263 insertions(+), 84 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index 2c0118270c006a..7855505cb04872 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -31,6 +31,8 @@ public class ZipArchive : IDisposable private byte[] _archiveComment; private Encoding? _entryNameAndCommentEncoding; + private long _firstDeletedEntryOffset; + #if DEBUG_FORCE_ZIP64 public bool _forceZip64; #endif @@ -168,12 +170,14 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? _entries = new List(); _entriesCollection = new ReadOnlyCollection(_entries); _entriesDictionary = new Dictionary(); + Dirty = 0; _readEntries = false; _leaveOpen = leaveOpen; _centralDirectoryStart = 0; // invalid until ReadCentralDirectory _isDisposed = false; _numberOfThisDisk = 0; // invalid until ReadCentralDirectory _archiveComment = Array.Empty(); + _firstDeletedEntryOffset = long.MaxValue; switch (mode) { @@ -221,7 +225,11 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? public string Comment { get => (EntryNameAndCommentEncoding ?? Encoding.UTF8).GetString(_archiveComment); - set => _archiveComment = ZipHelper.GetEncodedTruncatedBytesFromString(value, EntryNameAndCommentEncoding, ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength, out _); + set + { + _archiveComment = ZipHelper.GetEncodedTruncatedBytesFromString(value, EntryNameAndCommentEncoding, ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength, out _); + Dirty |= DirtyState.DynamicLengthMetadata; + } } /// @@ -389,6 +397,10 @@ private set } } + // This property's value only relates to the top-level fields of the archive (such as the archive comment.) + // New entries in the archive won't change its state. + internal DirtyState Dirty { get; private set; } + private ZipArchiveEntry DoCreateEntry(string entryName, CompressionLevel? compressionLevel) { ArgumentException.ThrowIfNullOrEmpty(entryName); @@ -415,7 +427,7 @@ internal void AcquireArchiveStream(ZipArchiveEntry entry) { if (!_archiveStreamOwner.EverOpenedForWrite) { - _archiveStreamOwner.WriteAndFinishLocalEntry(); + _archiveStreamOwner.WriteAndFinishLocalEntry(true); } else { @@ -447,6 +459,11 @@ internal void RemoveEntry(ZipArchiveEntry entry) _entries.Remove(entry); _entriesDictionary.Remove(entry.FullName); + // Keep track of the offset of the earliest deleted entry in the archive + if (entry._originallyInArchive && entry._offsetOfLocalHeader < _firstDeletedEntryOffset) + { + _firstDeletedEntryOffset = entry._offsetOfLocalHeader; + } } internal void ThrowIfDisposed() @@ -502,6 +519,9 @@ private void ReadCentralDirectory() numberOfEntries++; } + // Sort _entries by each archive entry's position + _entries.Sort(ZipArchiveEntry.LocalHeaderOffsetComparer.Instance); + if (numberOfEntries != _expectedNumberOfEntries) throw new InvalidDataException(SR.NumEntriesWrong); } @@ -633,41 +653,108 @@ private void WriteFile() // if we are in update mode, we call EnsureCentralDirectoryRead, which sets readEntries to true Debug.Assert(_readEntries); + // Entries starting after this offset have had a dynamically-sized change. Everything on or after this point must be rewritten. + long dynamicDirtyStartingOffset = 0; + List entriesToWrite = _entries; + if (_mode == ZipArchiveMode.Update) { - List markedForDelete = new List(); + // Entries starting after this offset have some kind of change made to them. It might just be a fixed-length field though, in which case + // that single entry's metadata can be rewritten without impacting anything else. + long startingOffset = _firstDeletedEntryOffset; + long nextFileOffset = 0; + dynamicDirtyStartingOffset = startingOffset; + + entriesToWrite = new(_entries.Count); foreach (ZipArchiveEntry entry in _entries) { - if (!entry.LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded()) - markedForDelete.Add(entry); + if (entry._originallyInArchive) + { + if (entry.Dirty == 0) + { + // Keep track of the expected position of the file entry after the final untouched file entry so that when the loop completes, + // we'll know which position to start writing new entries from. + nextFileOffset = Math.Max(nextFileOffset, entry.OffsetOfCompressedData + entry.CompressedLength); + } + // When calculating the starting offset to load the files from, only look at dirty entries which are already in the archive. + else + { + startingOffset = Math.Min(startingOffset, entry._offsetOfLocalHeader); + } + + // We want to re-write entries which are after the starting offset of the first entry which has pending data to write. + // NB: the existing ZipArchiveEntries are sorted in _entries by their position ascending. + if (entry._offsetOfLocalHeader >= startingOffset) + { + // If the pending data to write is fixed-length metadata in the header, there's no need to load the full file for + // inflation and deflation. + if ((entry.Dirty & (DirtyState.DynamicLengthMetadata | DirtyState.StoredData)) != 0) + { + dynamicDirtyStartingOffset = Math.Min(dynamicDirtyStartingOffset, entry._offsetOfLocalHeader); + } + if (entry._offsetOfLocalHeader >= dynamicDirtyStartingOffset) + { + entry.LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded(); + } + + entriesToWrite.Add(entry); + } + } + else + { + entriesToWrite.Add(entry); + } + } + + // If the offset of entries to write from is still at long.MaxValue, then we know that nothing has been deleted, + // nothing has been modified - so we just want to move to the end of all remaining files in the archive. + if (startingOffset == long.MaxValue) + { + startingOffset = nextFileOffset; } - foreach (ZipArchiveEntry entry in markedForDelete) - entry.Delete(); - _archiveStream.Seek(0, SeekOrigin.Begin); - _archiveStream.SetLength(0); + _archiveStream.Seek(startingOffset, SeekOrigin.Begin); } - foreach (ZipArchiveEntry entry in _entries) + foreach (ZipArchiveEntry entry in entriesToWrite) { - entry.WriteAndFinishLocalEntry(); + // We don't always need to write the local header entry, ZipArchiveEntry is usually able to work out when it doesn't need to. + // We want to force this header entry to be written (even for completely untouched entries) if the entry comes after one + // which had a pending dynamically-sized write. + bool forceWriteLocalEntry = !entry._originallyInArchive || (entry._originallyInArchive && entry._offsetOfLocalHeader >= dynamicDirtyStartingOffset); + + entry.WriteAndFinishLocalEntry(forceWriteLocalEntry); } long startOfCentralDirectory = _archiveStream.Position; + // If there are no entries in the archive, we still want to create the archive epilogue. + bool archiveEpilogueRequiresUpdate = _entries.Count == 0; foreach (ZipArchiveEntry entry in _entries) { - entry.WriteCentralDirectoryFileHeader(); + // The central directory needs to be rewritten if its position has moved, if there's a new entry in the archive, or if the entry might be different. + bool centralDirectoryEntryRequiresUpdate = startOfCentralDirectory != _centralDirectoryStart + | (!entry._originallyInArchive || (entry._originallyInArchive && entry._offsetOfLocalHeader >= dynamicDirtyStartingOffset)); + + entry.WriteCentralDirectoryFileHeader(centralDirectoryEntryRequiresUpdate); + archiveEpilogueRequiresUpdate |= centralDirectoryEntryRequiresUpdate; } long sizeOfCentralDirectory = _archiveStream.Position - startOfCentralDirectory; - WriteArchiveEpilogue(startOfCentralDirectory, sizeOfCentralDirectory); + WriteArchiveEpilogue(startOfCentralDirectory, sizeOfCentralDirectory, archiveEpilogueRequiresUpdate); + + // If entries have been removed and new (smaller) ones added, there could be empty space at the end of the file. + // Shrink the file to reclaim this space. + if (_mode == ZipArchiveMode.Update && _archiveStream.Position != _archiveStream.Length) + { + _archiveStream.SetLength(_archiveStream.Position); + } } // writes eocd, and if needed, zip 64 eocd, zip64 eocd locator // should only throw an exception in extremely exceptional cases because it is called from dispose - private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentralDirectory) + private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentralDirectory, bool centralDirectoryChanged) { // determine if we need Zip 64 if (startOfCentralDirectory >= uint.MaxValue @@ -680,12 +767,36 @@ private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentr { // if we need zip 64, write zip 64 eocd and locator long zip64EOCDRecordStart = _archiveStream.Position; - Zip64EndOfCentralDirectoryRecord.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory); - Zip64EndOfCentralDirectoryLocator.WriteBlock(_archiveStream, zip64EOCDRecordStart); + + if (centralDirectoryChanged) + { + Zip64EndOfCentralDirectoryRecord.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory); + Zip64EndOfCentralDirectoryLocator.WriteBlock(_archiveStream, zip64EOCDRecordStart); + } + else + { + _archiveStream.Seek(Zip64EndOfCentralDirectoryRecord.TotalSize, SeekOrigin.Current); + _archiveStream.Seek(Zip64EndOfCentralDirectoryLocator.TotalSize, SeekOrigin.Current); + } } // write normal eocd - ZipEndOfCentralDirectoryBlock.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory, _archiveComment); + if (centralDirectoryChanged | (Dirty != 0)) + { + ZipEndOfCentralDirectoryBlock.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory, _archiveComment); + } + else + { + _archiveStream.Seek(ZipEndOfCentralDirectoryBlock.TotalSize + _archiveComment.Length, SeekOrigin.Current); + } + } + + [Flags] + internal enum DirtyState + { + FixedLengthMetadata = 0x1, + DynamicLengthMetadata = 0x2, + StoredData = 0x4 } } } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index d54cb5a9547131..367044c3e3e29c 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -17,7 +17,7 @@ namespace System.IO.Compression public partial class ZipArchiveEntry { private ZipArchive _archive; - private readonly bool _originallyInArchive; + internal readonly bool _originallyInArchive; private readonly uint _diskNumberStart; private readonly ZipVersionMadeByPlatform _versionMadeByPlatform; private ZipVersionNeededValues _versionMadeBySpecification; @@ -28,7 +28,7 @@ public partial class ZipArchiveEntry private DateTimeOffset _lastModified; private long _compressedSize; private long _uncompressedSize; - private long _offsetOfLocalHeader; + internal long _offsetOfLocalHeader; private long? _storedOffsetOfCompressedData; private uint _crc32; // An array of buffers, each a maximum of MaxSingleBufferSize in size @@ -87,6 +87,8 @@ internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd) _fileComment = cd.FileComment; _compressionLevel = MapCompressionLevel(_generalPurposeBitFlag, CompressionMethod); + + Dirty = 0; } // Initializes a ZipArchiveEntry instance for a new archive entry with a specified compression level. @@ -148,6 +150,8 @@ internal ZipArchiveEntry(ZipArchive archive, string entryName) { _archive.AcquireArchiveStream(this); } + + Dirty = 0; } /// @@ -187,6 +191,7 @@ public int ExternalAttributes { ThrowIfInvalidArchive(); _externalFileAttr = (uint)value; + Dirty |= ZipArchive.DirtyState.FixedLengthMetadata; } } @@ -209,6 +214,7 @@ public string Comment { _generalPurposeBitFlag |= BitFlagValues.UnicodeFileNameAndComment; } + Dirty |= ZipArchive.DirtyState.DynamicLengthMetadata; } } @@ -273,6 +279,7 @@ public DateTimeOffset LastWriteTime throw new ArgumentOutOfRangeException(nameof(value), SR.DateTimeOutOfRange); _lastModified = value; + Dirty |= ZipArchive.DirtyState.FixedLengthMetadata; } } @@ -295,6 +302,8 @@ public long Length /// public string Name => ParseFileName(FullName, _versionMadeByPlatform); + internal ZipArchive.DirtyState Dirty { get; private set; } + /// /// Deletes the entry from the archive. /// @@ -359,7 +368,7 @@ public override string ToString() internal bool EverOpenedForWrite => _everOpenedForWrite; - private long OffsetOfCompressedData + internal long OffsetOfCompressedData { get { @@ -446,15 +455,15 @@ private CompressionMethodValues CompressionMethod // that we are reading to write the central directory // // should only throw an exception in extremely exceptional cases because it is called from dispose - internal void WriteAndFinishLocalEntry() + internal void WriteAndFinishLocalEntry(bool forceWrite) { CloseStreams(); - WriteLocalFileHeaderAndDataIfNeeded(); + WriteLocalFileHeaderAndDataIfNeeded(forceWrite); UnloadStreams(); } // should only throw an exception in extremely exceptional cases because it is called from dispose - internal void WriteCentralDirectoryFileHeader() + internal void WriteCentralDirectoryFileHeader(bool forceWrite) { // This part is simple, because we should definitely know the sizes by this time BinaryWriter writer = new BinaryWriter(_archive.ArchiveStream); @@ -524,42 +533,55 @@ internal void WriteCentralDirectoryFileHeader() extraFieldLength = (ushort)bigExtraFieldLength; } - writer.Write(ZipCentralDirectoryFileHeader.SignatureConstant); // Central directory file header signature (4 bytes) - writer.Write((byte)_versionMadeBySpecification); // Version made by Specification (version) (1 byte) - writer.Write((byte)CurrentZipPlatform); // Version made by Compatibility (type) (1 byte) - writer.Write((ushort)_versionToExtract); // Minimum version needed to extract (2 bytes) - writer.Write((ushort)_generalPurposeBitFlag); // General Purpose bit flag (2 bytes) - writer.Write((ushort)CompressionMethod); // The Compression method (2 bytes) - writer.Write(ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); // File last modification time and date (4 bytes) - writer.Write(_crc32); // CRC-32 (4 bytes) - writer.Write(compressedSizeTruncated); // Compressed Size (4 bytes) - writer.Write(uncompressedSizeTruncated); // Uncompressed Size (4 bytes) - writer.Write((ushort)_storedEntryNameBytes.Length); // File Name Length (2 bytes) - writer.Write(extraFieldLength); // Extra Field Length (2 bytes) - - Debug.Assert(_fileComment.Length <= ushort.MaxValue); - - writer.Write((ushort)_fileComment.Length); - writer.Write((ushort)0); // disk number start - writer.Write((ushort)0); // internal file attributes - writer.Write(_externalFileAttr); // external file attributes - writer.Write(offsetOfLocalHeaderTruncated); // offset of local header - - writer.Write(_storedEntryNameBytes); - - // write extra fields - if (zip64Needed) - zip64ExtraField.WriteBlock(_archive.ArchiveStream); - if (_cdUnknownExtraFields != null) - ZipGenericExtraField.WriteAllBlocks(_cdUnknownExtraFields, _archive.ArchiveStream); + if (_originallyInArchive && Dirty == 0 && !forceWrite) + { + long centralDirectoryHeaderLength = 46 + + _storedEntryNameBytes.Length + + (zip64Needed ? zip64ExtraField.TotalSize : 0) + + (_cdUnknownExtraFields != null ? ZipGenericExtraField.TotalSize(_cdUnknownExtraFields) : 0) + + _fileComment.Length; - if (_fileComment.Length > 0) - writer.Write(_fileComment); + _archive.ArchiveStream.Seek(centralDirectoryHeaderLength, SeekOrigin.Current); + } + else + { + writer.Write(ZipCentralDirectoryFileHeader.SignatureConstant); // Central directory file header signature (4 bytes) + writer.Write((byte)_versionMadeBySpecification); // Version made by Specification (version) (1 byte) + writer.Write((byte)CurrentZipPlatform); // Version made by Compatibility (type) (1 byte) + writer.Write((ushort)_versionToExtract); // Minimum version needed to extract (2 bytes) + writer.Write((ushort)_generalPurposeBitFlag); // General Purpose bit flag (2 bytes) + writer.Write((ushort)CompressionMethod); // The Compression method (2 bytes) + writer.Write(ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); // File last modification time and date (4 bytes) + writer.Write(_crc32); // CRC-32 (4 bytes) + writer.Write(compressedSizeTruncated); // Compressed Size (4 bytes) + writer.Write(uncompressedSizeTruncated); // Uncompressed Size (4 bytes) + writer.Write((ushort)_storedEntryNameBytes.Length); // File Name Length (2 bytes) + writer.Write(extraFieldLength); // Extra Field Length (2 bytes) + + Debug.Assert(_fileComment.Length <= ushort.MaxValue); + + writer.Write((ushort)_fileComment.Length); + writer.Write((ushort)0); // disk number start + writer.Write((ushort)0); // internal file attributes + writer.Write(_externalFileAttr); // external file attributes + writer.Write(offsetOfLocalHeaderTruncated); // offset of local header + + writer.Write(_storedEntryNameBytes); + + // write extra fields + if (zip64Needed) + zip64ExtraField.WriteBlock(_archive.ArchiveStream); + if (_cdUnknownExtraFields != null) + ZipGenericExtraField.WriteAllBlocks(_cdUnknownExtraFields, _archive.ArchiveStream); + + if (_fileComment.Length > 0) + writer.Write(_fileComment); + } } - // returns false if fails, will get called on every entry before closing in update mode + // throws exception if fails, will get called on every relevant entry before closing in update mode // can throw InvalidDataException - internal bool LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded() + internal void LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded() { // we should have made this exact call in _archive.Init through ThrowIfOpenable Debug.Assert(IsOpenable(false, true, out _)); @@ -593,8 +615,6 @@ internal bool LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded() } ZipHelper.ReadBytes(_archive.ArchiveStream, _compressedBytes[_compressedBytes.Length - 1], (int)(_compressedSize % MaxSingleBufferSize)); } - - return true; } internal void ThrowIfNotOpenable(bool needToUncompress, bool needToLoadIntoMemory) @@ -696,6 +716,7 @@ private WrappedStream OpenInWriteMode() _archive.DebugAssertIsStillArchiveStreamOwner(this); _everOpenedForWrite = true; + Dirty |= ZipArchive.DirtyState.StoredData; CheckSumAndSizeWriteStream crcSizeStream = GetDataCompressor(_archive.ArchiveStream, true, (object? o, EventArgs e) => { // release the archive stream @@ -716,6 +737,7 @@ private WrappedStream OpenInUpdateMode() ThrowIfNotOpenable(needToUncompress: true, needToLoadIntoMemory: true); _everOpenedForWrite = true; + Dirty |= ZipArchive.DirtyState.StoredData; _currentlyOpenForWrite = true; // always put it at the beginning for them UncompressedData.Seek(0, SeekOrigin.Begin); @@ -840,7 +862,7 @@ private static BitFlagValues MapDeflateCompressionOption(BitFlagValues generalPu } // return value is true if we allocated an extra field for 64 bit headers, un/compressed size - private bool WriteLocalFileHeader(bool isEmptyFile) + private bool WriteLocalFileHeader(bool isEmptyFile, bool forceWrite) { BinaryWriter writer = new BinaryWriter(_archive.ArchiveStream); @@ -922,29 +944,48 @@ private bool WriteLocalFileHeader(bool isEmptyFile) extraFieldLength = (ushort)bigExtraFieldLength; } - // write header - writer.Write(ZipLocalFileHeader.SignatureConstant); - writer.Write((ushort)_versionToExtract); - writer.Write((ushort)_generalPurposeBitFlag); - writer.Write((ushort)CompressionMethod); - writer.Write(ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); // uint - writer.Write(_crc32); // uint - writer.Write(compressedSizeTruncated); // uint - writer.Write(uncompressedSizeTruncated); // uint - writer.Write((ushort)_storedEntryNameBytes.Length); - writer.Write(extraFieldLength); // ushort - - writer.Write(_storedEntryNameBytes); - - if (zip64Used) - zip64ExtraField.WriteBlock(_archive.ArchiveStream); - if (_lhUnknownExtraFields != null) - ZipGenericExtraField.WriteAllBlocks(_lhUnknownExtraFields, _archive.ArchiveStream); + // If this is an existing, unchanged entry then silently skip forwards. + // If it's new or changed, write the header. + if (_originallyInArchive && Dirty == 0 && !forceWrite) + { + writer.Seek(30 + _storedEntryNameBytes.Length, SeekOrigin.Current); + + if (zip64Used) + { + writer.Seek(zip64ExtraField.TotalSize, SeekOrigin.Current); + } + + if (_lhUnknownExtraFields != null) + { + writer.Seek(ZipGenericExtraField.TotalSize(_lhUnknownExtraFields), SeekOrigin.Current); + } + } + else + { + // write header + writer.Write(ZipLocalFileHeader.SignatureConstant); + writer.Write((ushort)_versionToExtract); + writer.Write((ushort)_generalPurposeBitFlag); + writer.Write((ushort)CompressionMethod); + writer.Write(ZipHelper.DateTimeToDosTime(_lastModified.DateTime)); // uint + writer.Write(_crc32); // uint + writer.Write(compressedSizeTruncated); // uint + writer.Write(uncompressedSizeTruncated); // uint + writer.Write((ushort)_storedEntryNameBytes.Length); + writer.Write(extraFieldLength); // ushort + + writer.Write(_storedEntryNameBytes); + + if (zip64Used) + zip64ExtraField.WriteBlock(_archive.ArchiveStream); + if (_lhUnknownExtraFields != null) + ZipGenericExtraField.WriteAllBlocks(_lhUnknownExtraFields, _archive.ArchiveStream); + } return zip64Used; } - private void WriteLocalFileHeaderAndDataIfNeeded() + private void WriteLocalFileHeaderAndDataIfNeeded(bool forceWrite) { // _storedUncompressedData gets frozen here, and is what gets written to the file if (_storedUncompressedData != null || _compressedBytes != null) @@ -973,7 +1014,7 @@ private void WriteLocalFileHeaderAndDataIfNeeded() _compressedSize = 0; } - WriteLocalFileHeader(isEmptyFile: _uncompressedSize == 0); + WriteLocalFileHeader(isEmptyFile: _uncompressedSize == 0, forceWrite: true); // according to ZIP specs, zero-byte files MUST NOT include file data if (_uncompressedSize != 0) @@ -986,12 +1027,19 @@ private void WriteLocalFileHeaderAndDataIfNeeded() } } } - else // there is no data in the file, but if we are in update mode, we still need to write a header + else // there is no data in the file (or the data in the file has not been loaded), but if we are in update mode, we may still need to write a header { if (_archive.Mode == ZipArchiveMode.Update || !_everOpenedForWrite) { _everOpenedForWrite = true; - WriteLocalFileHeader(isEmptyFile: true); + WriteLocalFileHeader(isEmptyFile: _uncompressedSize == 0, forceWrite: forceWrite); + + // If we know that we need to update the file header (but don't need to load and update the data itself) + // then advance the position past it. + if (_compressedSize != 0) + { + _archive.ArchiveStream.Seek(_compressedSize, SeekOrigin.Current); + } } } } @@ -1244,7 +1292,7 @@ public override void Write(byte[] buffer, int offset, int count) { _everWritten = true; // write local header, we are good to go - _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false); + _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false, forceWrite: true); } _crcSizeStream.Write(buffer, offset, count); @@ -1264,7 +1312,7 @@ public override void Write(ReadOnlySpan source) { _everWritten = true; // write local header, we are good to go - _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false); + _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false, forceWrite: true); } _crcSizeStream.Write(source); @@ -1295,7 +1343,7 @@ async ValueTask Core(ReadOnlyMemory buffer, CancellationToken cancellation { _everWritten = true; // write local header, we are good to go - _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false); + _usedZip64inLH = _entry.WriteLocalFileHeader(isEmptyFile: false, forceWrite: true); } await _crcSizeStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); @@ -1328,7 +1376,7 @@ protected override void Dispose(bool disposing) if (!_everWritten) { // write local header, no data, so we use stored - _entry.WriteLocalFileHeader(isEmptyFile: true); + _entry.WriteLocalFileHeader(isEmptyFile: true, forceWrite: true); } else { @@ -1364,5 +1412,21 @@ internal enum CompressionMethodValues : ushort BZip2 = 0xC, LZMA = 0xE } + + internal sealed class LocalHeaderOffsetComparer : Comparer + { + private static readonly LocalHeaderOffsetComparer s_instance = new LocalHeaderOffsetComparer(); + + public static LocalHeaderOffsetComparer Instance => s_instance; + + // Newly added ZipArchiveEntry records should always go to the end of the file. + public override int Compare(ZipArchiveEntry? x, ZipArchiveEntry? y) + { + long xOffset = x != null && !x._originallyInArchive ? long.MaxValue : x?._offsetOfLocalHeader ?? long.MinValue; + long yOffset = y != null && !y._originallyInArchive ? long.MaxValue : y?._offsetOfLocalHeader ?? long.MinValue; + + return xOffset.CompareTo(yOffset); + } + } } } diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs index 8bcb5495efe461..a572fc3a1b983d 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs @@ -323,6 +323,7 @@ internal struct Zip64EndOfCentralDirectoryLocator public const int SignatureSize = sizeof(uint); public const int SizeOfBlockWithoutSignature = 16; + public const long TotalSize = SizeOfBlockWithoutSignature + SignatureSize; public uint NumberOfDiskWithZip64EOCD; public ulong OffsetOfZip64EOCD; @@ -355,6 +356,7 @@ internal struct Zip64EndOfCentralDirectoryRecord { private const uint SignatureConstant = 0x06064B50; private const ulong NormalSize = 0x2C; // the size of the data excluding the size/signature fields if no extra data included + public const long TotalSize = (long)NormalSize + 12; // total size of the entire block public ulong SizeOfThisRecord; public ushort VersionMadeBy; @@ -571,6 +573,8 @@ internal struct ZipEndOfCentralDirectoryBlock // This is the minimum possible size, assuming the zip file comments variable section is empty public const int SizeOfBlockWithoutSignature = 18; + // This also assumes a zero-length comment. + public const long TotalSize = SizeOfBlockWithoutSignature + SignatureSize; // The end of central directory can have a variable size zip file comment at the end, but its max length can be 64K // The Zip File Format Specification does not explicitly mention a max size for this field, but we are assuming this From 9c03befbfcc89cf4e90f5a74b14a1dbaa52249de Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Sun, 26 May 2024 18:15:51 +0100 Subject: [PATCH 2/9] Added tests for changed WriteFile behaviour --- .../System/IO/Compression/ZipTestHelper.cs | 25 ++ .../tests/System.IO.Compression.Tests.csproj | 1 + .../zip_InvalidParametersAndStrangeFiles.cs | 24 +- .../tests/ZipArchive/zip_ReadTests.cs | 27 ++ .../tests/ZipArchive/zip_UpdateTests.cs | 336 +++++++++++++++++- 5 files changed, 409 insertions(+), 4 deletions(-) diff --git a/src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs b/src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs index 9666a16e2fb3ce..189542bcc2eff5 100644 --- a/src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs +++ b/src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs @@ -435,6 +435,31 @@ internal static void AddEntry(ZipArchive archive, string name, string contents, } } + public static byte[] CreateZipFile(int entryCount, byte[] entryContents) + { + using (MemoryStream ms = new()) + { + using (ZipArchive createdArchive = new(ms, ZipArchiveMode.Create, true)) + { + for (int i = 0; i < entryCount; i++) + { + string fileName = $"dummydata/{i}.bin"; + ZipArchiveEntry newEntry = createdArchive.CreateEntry(fileName); + + newEntry.LastWriteTime = DateTimeOffset.Now.AddHours(-1.0); + using (Stream entryWriteStream = newEntry.Open()) + { + entryWriteStream.Write(entryContents); + entryWriteStream.WriteByte((byte)(i % byte.MaxValue)); + } + } + } + ms.Flush(); + + return ms.ToArray(); + } + } + protected const string Utf8SmileyEmoji = "\ud83d\ude04"; protected const string Utf8LowerCaseOUmlautChar = "\u00F6"; protected const string Utf8CopyrightChar = "\u00A9"; diff --git a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj index eb974f44651a74..a8ba6bec33b614 100644 --- a/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj +++ b/src/libraries/System.IO.Compression/tests/System.IO.Compression.Tests.csproj @@ -29,6 +29,7 @@ + diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_InvalidParametersAndStrangeFiles.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_InvalidParametersAndStrangeFiles.cs index 87695ad071edff..ae8cb3aa99332a 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_InvalidParametersAndStrangeFiles.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_InvalidParametersAndStrangeFiles.cs @@ -683,7 +683,9 @@ public static void ZipWithLargeSparseFile() /// Deflate 0x08, _uncompressedSize 0, _compressedSize 2, compressed data: 0x0300 (\u0003 ETX) /// 2. EmptyFileCompressedWrongSize has /// Deflate 0x08, _uncompressedSize 0, _compressedSize 4, compressed data: 0xBAAD0300 (just bad data) - /// ZipArchive is expected to change compression method to Stored (0x00) and ignore "bad" compressed size + /// ZipArchive is not expected to make any changes to the compression method of an archive entry unless + /// it's been changed. If it has been changed, ZipArchive is expected to change compression method to + /// Stored (0x00) and ignore "bad" compressed size /// [Theory] [MemberData(nameof(EmptyFiles))] @@ -692,14 +694,30 @@ public void ReadArchive_WithEmptyDeflatedFile(byte[] fileBytes) using (var testStream = new MemoryStream(fileBytes)) { const string ExpectedFileName = "xl/customProperty2.bin"; - // open archive with zero-length file that is compressed (Deflate = 0x8) + + byte firstEntryCompressionMethod = fileBytes[8]; + + // first attempt: open archive with zero-length file that is compressed (Deflate = 0x8) using (var zip = new ZipArchive(testStream, ZipArchiveMode.Update, leaveOpen: true)) { - // dispose without making any changes will rewrite the archive + // dispose without making any changes will make no changes to the input stream } byte[] fileContent = testStream.ToArray(); + // compression method should not have changed + Assert.Equal(firstEntryCompressionMethod, fileBytes[8]); + + testStream.Seek(0, SeekOrigin.Begin); + // second attempt: open archive with zero-length file that is compressed (Deflate = 0x8) + using (var zip = new ZipArchive(testStream, ZipArchiveMode.Update, leaveOpen: true)) + using (var zipEntryStream = zip.Entries[0].Open()) + { + // dispose after opening an entry will rewrite the archive + } + + fileContent = testStream.ToArray(); + // compression method should change to "uncompressed" (Stored = 0x0) Assert.Equal(0, fileContent[8]); diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs index a8019e0f6f0e62..3c2a74cb48aa2b 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs @@ -273,6 +273,33 @@ public static async Task EnsureDisposeIsCalledAsExpectedOnTheUnderlyingStream(bo Assert.Equal(expectedDisposeCalls, disposeCallCountingStream.NumberOfDisposeCalls); } + [Fact] + public static void ArchivesInOffsetOrder() + { + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + ZipArchive source = new ZipArchive(ms, ZipArchiveMode.Read, leaveOpen: true); + long previousOffset = long.MinValue; + System.Reflection.FieldInfo offsetOfLocalHeader = typeof(ZipArchiveEntry).GetField("_offsetOfLocalHeader", System.Reflection.BindingFlags.NonPublic | Reflection.BindingFlags.Instance); + + for (int i = 0; i < source.Entries.Count; i++) + { + ZipArchiveEntry entry = source.Entries[i]; + long offset = (long)offsetOfLocalHeader.GetValue(entry); + + Assert.True(offset > previousOffset); + } + + source.Dispose(); + } + } + private class DisposeCallCountingStream : MemoryStream { public int NumberOfDisposeCalls { get; private set; } diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs index 362b6bf95e3a6f..c66a3b8dfb43ef 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs @@ -207,7 +207,7 @@ public static async Task AddFileToArchive() await updateArchive(archive, zmodified(Path.Combine("addFile", "added.txt")), "added.txt"); } - IsZipSameAsDir(testArchive, zmodified ("addFile"), ZipArchiveMode.Read, requireExplicit: true, checkTimes: true); + IsZipSameAsDir(testArchive, zmodified("addFile"), ZipArchiveMode.Read, requireExplicit: true, checkTimes: true); } [Fact] @@ -351,5 +351,339 @@ public void Update_VerifyDuplicateEntriesAreAllowed() Assert.Equal(2, archive.Entries.Count); } + + [Fact] + public static async Task Update_PerformMinimalWritesWhenNoFilesChanged() + { + using (LocalMemoryStream ms = await LocalMemoryStream.readAppFileAsync(zfile("normal.zip"))) + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + int archiveEntries = target.Entries.Count; + + target.Dispose(); + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // No changes to the archive should result in no writes to the file. + Assert.Equal(0, writesCalled + writeBytesCalled); + } + } + + [Theory] + [InlineData(49, 1, 1)] + [InlineData(40, 3, 2)] + [InlineData(30, 5, 3)] + [InlineData(0, 8, 1)] + public void Update_PerformMinimalWritesWhenFixedLengthEntryHeaderFieldChanged(int startIndex, int entriesToModify, int step) + { + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + // Open the first archive in Update mode, then change the value of {entriesToModify} fixed-length entry headers + // (LastWriteTime.) Verify the correct number of writes performed as a result, then reopen the same + // archive, get the entries and make sure that the fields hold the expected value. + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + List<(string EntryName, DateTimeOffset LastWriteTime)> updatedMetadata = new(entriesToModify); + + for (int i = 0; i < entriesToModify; i++) + { + int modificationIndex = startIndex + (i * step); + ZipArchiveEntry entryToModify = target.Entries[modificationIndex]; + string entryName = entryToModify.FullName; + DateTimeOffset expectedDateTimeOffset = entryToModify.LastWriteTime.AddHours(1.0); + + entryToModify.LastWriteTime = expectedDateTimeOffset; + updatedMetadata.Add((entryName, expectedDateTimeOffset)); + } + + target.Dispose(); + + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + // As above, check 1: the number of writes performed should be minimal. + // 11 writes per archive entry for the local file header. + // 18 writes per archive entry for the central directory header. + // 8 writes (sometimes 9, if there's a comment) for the end of central directory block. + // The EOCD block won't change as a result of our modifications, so is excluded from the counts. + Assert.Equal(((11 + 18) * entriesToModify), writesCalled + writeBytesCalled); + + trackingStream.Seek(0, SeekOrigin.Begin); + target = new ZipArchive(trackingStream, ZipArchiveMode.Read); + + for (int i = 0; i < entriesToModify; i++) + { + int modificationIndex = startIndex + (i * step); + var expectedValues = updatedMetadata[i]; + ZipArchiveEntry verifiedEntry = target.Entries[modificationIndex]; + + // Check 2: the field holds the expected value (and thus has been written to the file.) + Assert.NotNull(verifiedEntry); + Assert.Equal(expectedValues.EntryName, verifiedEntry.FullName); + Assert.Equal(expectedValues.LastWriteTime, verifiedEntry.LastWriteTime); + } + + // Check 3: no other data has been corrupted as a result + for (int i = 0; i < target.Entries.Count; i++) + { + ZipArchiveEntry entry = target.Entries[i]; + byte[] expectedBuffer = [.. sampleEntryContents, (byte)(i % byte.MaxValue)]; + byte[] readBuffer = new byte[expectedBuffer.Length]; + + using (Stream readStream = entry.Open()) + { + readStream.Read(readBuffer.AsSpan()); + } + + Assert.Equal(expectedBuffer, readBuffer); + } + + target.Dispose(); + } + } + } + + [Theory] + [InlineData(0)] + [InlineData(10)] + [InlineData(20)] + [InlineData(30)] + [InlineData(49)] + public void Update_PerformMinimalWritesWhenEntryDataChanges(int index) + => Update_PerformMinimalWritesWithDataAndHeaderChanges(index, -1); + + [Theory] + [InlineData(0, 0)] + [InlineData(20, 40)] + [InlineData(30, 10)] + public void Update_PerformMinimalWritesWithDataAndHeaderChanges(int dataChangeIndex, int lastWriteTimeChangeIndex) + { + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + byte[] expectedUpdatedEntryContents = [19, 18, 17, 16, 15, 14, 13, 12, 11, 10]; + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + // Open the archive in Update mode, then rewrite the data of the {dataChangeIndex}th entry + // and set the LastWriteTime of the {lastWriteTimeChangeIndex}th entry. + // Verify the correct number of writes performed as a result, then reopen the same + // archive, get the entries and make sure that the fields hold the expected value. + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + ZipArchiveEntry entryToRewrite = target.Entries[dataChangeIndex]; + int totalEntries = target.Entries.Count; + int expectedEntriesToWrite = target.Entries.Count - dataChangeIndex; + DateTimeOffset expectedWriteTime = default; + + if (lastWriteTimeChangeIndex != -1) + { + ZipArchiveEntry entryToModify = target.Entries[lastWriteTimeChangeIndex]; + + expectedWriteTime = entryToModify.LastWriteTime.AddHours(1.0); + entryToModify.LastWriteTime = expectedWriteTime; + } + + using (var entryStream = entryToRewrite.Open()) + { + entryStream.SetLength(0); + entryStream.Write(expectedUpdatedEntryContents); + } + + target.Dispose(); + + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // If the data changed first, then every entry after it will be written in full. If the fixed-length + // metadata changed first, some entries which won't have been fully written - just updated in place. + // 11 writes per archive entry for the local file header. + // 18 writes per archive entry for the central directory header. + // 4 writes for the file data of the updated entry itself + // 1 write per archive entry for the file data of other entries after this in the file + // 8 writes (sometimes 9, if there's a comment) for the end of central directory block. + // All of the central directory headers must be rewritten after an entry's data has been modified. + if (dataChangeIndex <= lastWriteTimeChangeIndex || lastWriteTimeChangeIndex == -1) + { + // dataChangeIndex -> totalEntries: rewrite in full + // all central directories headers + Assert.Equal(8 + 3 + ((11 + 1) * expectedEntriesToWrite) + (18 * totalEntries), writesCalled + writeBytesCalled); + } + else + { + // lastWriteTimeChangeIndex: partial rewrite + // dataChangeIndex -> totalEntries: rewrite in full + // all central directory headers + Assert.Equal(8 + 3 + ((11 + 1) * expectedEntriesToWrite) + (18 * totalEntries) + 11, writesCalled + writeBytesCalled); + } + + trackingStream.Seek(0, SeekOrigin.Begin); + target = new ZipArchive(trackingStream, ZipArchiveMode.Read); + + // Check 2: no other data has been corrupted as a result + for (int i = 0; i < target.Entries.Count; i++) + { + ZipArchiveEntry entry = target.Entries[i]; + byte[] expectedBuffer = i == dataChangeIndex + ? expectedUpdatedEntryContents + : [.. sampleEntryContents, (byte)(i % byte.MaxValue)]; + byte[] readBuffer = new byte[expectedBuffer.Length]; + + using (Stream readStream = entry.Open()) + { + readStream.Read(readBuffer.AsSpan()); + } + + Assert.Equal(expectedBuffer, readBuffer); + + if (i == lastWriteTimeChangeIndex) + { + Assert.Equal(expectedWriteTime, entry.LastWriteTime); + } + } + } + } + } + + [Fact] + public async Task Update_PerformMinimalWritesWhenArchiveCommentChanged() + { + using (LocalMemoryStream ms = await LocalMemoryStream.readAppFileAsync(zfile("normal.zip"))) + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + string expectedComment = "14 byte comment"; + + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + target.Comment = expectedComment; + + target.Dispose(); + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // We expect 9 writes for the end of central directory block - 8 for the EOCD, 1 for the comment. + Assert.Equal(9, writesCalled + writeBytesCalled); + + trackingStream.Seek(0, SeekOrigin.Begin); + + target = new ZipArchive(trackingStream, ZipArchiveMode.Read, leaveOpen: true); + Assert.Equal(expectedComment, target.Comment); + } + } + + [Theory] + [InlineData(-1, 40)] + [InlineData(-1, 49)] + [InlineData(-1, 0)] + [InlineData(42, 40)] + [InlineData(38, 40)] + public void Update_PerformMinimalWritesWhenEntriesModifiedAndDeleted(int modifyIndex, int deleteIndex) + { + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + byte[] expectedUpdatedEntryContents = [22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]; + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + // Open the archive in Update mode, then rewrite the data of the {modifyIndex}th entry + // and delete the LastWriteTime of the {lastWriteTimeChangeIndex}th entry. + // Verify the correct number of writes performed as a result, then reopen the same + // archive, get the entries, make sure that the right number of entries have been + // found and that the entries have the correct contents. + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + int totalEntries = target.Entries.Count; + // Everything after the first modification or deletion is to be rewritten. + int expectedEntriesToWrite = (totalEntries - 1) - (modifyIndex == -1 ? deleteIndex : Math.Min(modifyIndex, deleteIndex)); + ZipArchiveEntry entryToDelete = target.Entries[deleteIndex]; + string deletedPath = entryToDelete.FullName; + string modifiedPath = null; + + if (modifyIndex != -1) + { + ZipArchiveEntry entryToRewrite = target.Entries[modifyIndex]; + + modifiedPath = entryToRewrite.FullName; + using (var entryStream = entryToRewrite.Open()) + { + entryStream.SetLength(0); + entryStream.Write(expectedUpdatedEntryContents); + } + } + + entryToDelete.Delete(); + + target.Dispose(); + + Assert.True(ms.Length < sampleZipFile.Length); + + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // 11 writes per archive entry for the local file header. + // 18 writes per archive entry for the central directory header. + // 4 writes for the file data of the updated entry itself + // 1 write per archive entry for the file data of other entries after this in the file + // 8 writes (sometimes 9, if there's a comment) for the end of central directory block. + // All of the central directory headers must be rewritten after an entry's data has been modified. + if (modifyIndex == -1) + { + Assert.Equal(8 + ((11 + 1) * expectedEntriesToWrite) + (18 * (totalEntries - 1)), writesCalled + writeBytesCalled); + } + else + { + Assert.Equal(8 + 3 + ((11 + 1) * expectedEntriesToWrite) + (18 * (totalEntries - 1)), writesCalled + writeBytesCalled); + } + + trackingStream.Seek(0, SeekOrigin.Begin); + target = new ZipArchive(trackingStream, ZipArchiveMode.Read); + + // Check 2: no other data has been corrupted as a result + for (int i = 0; i < target.Entries.Count; i++) + { + ZipArchiveEntry entry = target.Entries[i]; + // The expected index will be off by one if it's after the deleted index, so compensate + int expectedIndex = i < deleteIndex ? i : i + 1; + byte[] expectedBuffer = entry.FullName == modifiedPath + ? expectedUpdatedEntryContents + : [.. sampleEntryContents, (byte)(expectedIndex % byte.MaxValue)]; + byte[] readBuffer = new byte[expectedBuffer.Length]; + + using (Stream readStream = entry.Open()) + { + readStream.Read(readBuffer.AsSpan()); + } + + Assert.Equal(expectedBuffer, readBuffer); + + Assert.NotEqual(deletedPath, entry.FullName); + } + } + } + } } } From 03f7fcdbbb666fdce505e05916f35331c88cfcca Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Tue, 25 Jun 2024 07:05:30 +0100 Subject: [PATCH 3/9] Changes following code review Renamed Dirty and DirtyState to Changed and ChangeState. Explicitly assigned Unchanged as a zero-value ChangeState. --- .../src/System/IO/Compression/ZipArchive.cs | 15 +++++++------- .../System/IO/Compression/ZipArchiveEntry.cs | 20 +++++++++---------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index 7855505cb04872..7109ffff5722ce 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -170,7 +170,7 @@ public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? _entries = new List(); _entriesCollection = new ReadOnlyCollection(_entries); _entriesDictionary = new Dictionary(); - Dirty = 0; + Changed = ChangeState.Unchanged; _readEntries = false; _leaveOpen = leaveOpen; _centralDirectoryStart = 0; // invalid until ReadCentralDirectory @@ -228,7 +228,7 @@ public string Comment set { _archiveComment = ZipHelper.GetEncodedTruncatedBytesFromString(value, EntryNameAndCommentEncoding, ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength, out _); - Dirty |= DirtyState.DynamicLengthMetadata; + Changed |= ChangeState.DynamicLengthMetadata; } } @@ -399,7 +399,7 @@ private set // This property's value only relates to the top-level fields of the archive (such as the archive comment.) // New entries in the archive won't change its state. - internal DirtyState Dirty { get; private set; } + internal ChangeState Changed { get; private set; } private ZipArchiveEntry DoCreateEntry(string entryName, CompressionLevel? compressionLevel) { @@ -670,7 +670,7 @@ private void WriteFile() { if (entry._originallyInArchive) { - if (entry.Dirty == 0) + if (entry.Changed == ChangeState.Unchanged) { // Keep track of the expected position of the file entry after the final untouched file entry so that when the loop completes, // we'll know which position to start writing new entries from. @@ -688,7 +688,7 @@ private void WriteFile() { // If the pending data to write is fixed-length metadata in the header, there's no need to load the full file for // inflation and deflation. - if ((entry.Dirty & (DirtyState.DynamicLengthMetadata | DirtyState.StoredData)) != 0) + if ((entry.Changed & (ChangeState.DynamicLengthMetadata | ChangeState.StoredData)) != 0) { dynamicDirtyStartingOffset = Math.Min(dynamicDirtyStartingOffset, entry._offsetOfLocalHeader); } @@ -781,7 +781,7 @@ private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentr } // write normal eocd - if (centralDirectoryChanged | (Dirty != 0)) + if (centralDirectoryChanged | (Changed != ChangeState.Unchanged)) { ZipEndOfCentralDirectoryBlock.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory, _archiveComment); } @@ -792,8 +792,9 @@ private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentr } [Flags] - internal enum DirtyState + internal enum ChangeState { + Unchanged = 0x0, FixedLengthMetadata = 0x1, DynamicLengthMetadata = 0x2, StoredData = 0x4 diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index 367044c3e3e29c..bf60996a162047 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -88,7 +88,7 @@ internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd) _compressionLevel = MapCompressionLevel(_generalPurposeBitFlag, CompressionMethod); - Dirty = 0; + Changed = ZipArchive.ChangeState.Unchanged; } // Initializes a ZipArchiveEntry instance for a new archive entry with a specified compression level. @@ -151,7 +151,7 @@ internal ZipArchiveEntry(ZipArchive archive, string entryName) _archive.AcquireArchiveStream(this); } - Dirty = 0; + Changed = ZipArchive.ChangeState.Unchanged; } /// @@ -191,7 +191,7 @@ public int ExternalAttributes { ThrowIfInvalidArchive(); _externalFileAttr = (uint)value; - Dirty |= ZipArchive.DirtyState.FixedLengthMetadata; + Changed |= ZipArchive.ChangeState.FixedLengthMetadata; } } @@ -214,7 +214,7 @@ public string Comment { _generalPurposeBitFlag |= BitFlagValues.UnicodeFileNameAndComment; } - Dirty |= ZipArchive.DirtyState.DynamicLengthMetadata; + Changed |= ZipArchive.ChangeState.DynamicLengthMetadata; } } @@ -279,7 +279,7 @@ public DateTimeOffset LastWriteTime throw new ArgumentOutOfRangeException(nameof(value), SR.DateTimeOutOfRange); _lastModified = value; - Dirty |= ZipArchive.DirtyState.FixedLengthMetadata; + Changed |= ZipArchive.ChangeState.FixedLengthMetadata; } } @@ -302,7 +302,7 @@ public long Length /// public string Name => ParseFileName(FullName, _versionMadeByPlatform); - internal ZipArchive.DirtyState Dirty { get; private set; } + internal ZipArchive.ChangeState Changed { get; private set; } /// /// Deletes the entry from the archive. @@ -533,7 +533,7 @@ internal void WriteCentralDirectoryFileHeader(bool forceWrite) extraFieldLength = (ushort)bigExtraFieldLength; } - if (_originallyInArchive && Dirty == 0 && !forceWrite) + if (_originallyInArchive && Changed == ZipArchive.ChangeState.Unchanged && !forceWrite) { long centralDirectoryHeaderLength = 46 + _storedEntryNameBytes.Length @@ -716,7 +716,7 @@ private WrappedStream OpenInWriteMode() _archive.DebugAssertIsStillArchiveStreamOwner(this); _everOpenedForWrite = true; - Dirty |= ZipArchive.DirtyState.StoredData; + Changed |= ZipArchive.ChangeState.StoredData; CheckSumAndSizeWriteStream crcSizeStream = GetDataCompressor(_archive.ArchiveStream, true, (object? o, EventArgs e) => { // release the archive stream @@ -737,7 +737,7 @@ private WrappedStream OpenInUpdateMode() ThrowIfNotOpenable(needToUncompress: true, needToLoadIntoMemory: true); _everOpenedForWrite = true; - Dirty |= ZipArchive.DirtyState.StoredData; + Changed |= ZipArchive.ChangeState.StoredData; _currentlyOpenForWrite = true; // always put it at the beginning for them UncompressedData.Seek(0, SeekOrigin.Begin); @@ -946,7 +946,7 @@ private bool WriteLocalFileHeader(bool isEmptyFile, bool forceWrite) // If this is an existing, unchanged entry then silently skip forwards. // If it's new or changed, write the header. - if (_originallyInArchive && Dirty == 0 && !forceWrite) + if (_originallyInArchive && Changed == ZipArchive.ChangeState.Unchanged && !forceWrite) { writer.Seek(30 + _storedEntryNameBytes.Length, SeekOrigin.Current); From 15eb743095d68bf1b3b9fd3d4614c5dcdf685f19 Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Tue, 25 Jun 2024 07:22:55 +0100 Subject: [PATCH 4/9] Changed field protection levels Reset _originallyInArchive and _offsetOfLocalHeader to private members, exposed as internal properties. Also changed _versionToExtract to be private - this isn't ever used in System.IO.Compression outside of ZipArchiveEntry. --- .../src/System/IO/Compression/ZipArchive.cs | 18 +++++++++--------- .../System/IO/Compression/ZipArchiveEntry.cs | 14 +++++++++----- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index 7109ffff5722ce..b402d424ac6020 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -460,9 +460,9 @@ internal void RemoveEntry(ZipArchiveEntry entry) _entriesDictionary.Remove(entry.FullName); // Keep track of the offset of the earliest deleted entry in the archive - if (entry._originallyInArchive && entry._offsetOfLocalHeader < _firstDeletedEntryOffset) + if (entry.OriginallyInArchive && entry.OffsetOfLocalHeader < _firstDeletedEntryOffset) { - _firstDeletedEntryOffset = entry._offsetOfLocalHeader; + _firstDeletedEntryOffset = entry.OffsetOfLocalHeader; } } @@ -668,7 +668,7 @@ private void WriteFile() entriesToWrite = new(_entries.Count); foreach (ZipArchiveEntry entry in _entries) { - if (entry._originallyInArchive) + if (entry.OriginallyInArchive) { if (entry.Changed == ChangeState.Unchanged) { @@ -679,20 +679,20 @@ private void WriteFile() // When calculating the starting offset to load the files from, only look at dirty entries which are already in the archive. else { - startingOffset = Math.Min(startingOffset, entry._offsetOfLocalHeader); + startingOffset = Math.Min(startingOffset, entry.OffsetOfLocalHeader); } // We want to re-write entries which are after the starting offset of the first entry which has pending data to write. // NB: the existing ZipArchiveEntries are sorted in _entries by their position ascending. - if (entry._offsetOfLocalHeader >= startingOffset) + if (entry.OffsetOfLocalHeader >= startingOffset) { // If the pending data to write is fixed-length metadata in the header, there's no need to load the full file for // inflation and deflation. if ((entry.Changed & (ChangeState.DynamicLengthMetadata | ChangeState.StoredData)) != 0) { - dynamicDirtyStartingOffset = Math.Min(dynamicDirtyStartingOffset, entry._offsetOfLocalHeader); + dynamicDirtyStartingOffset = Math.Min(dynamicDirtyStartingOffset, entry.OffsetOfLocalHeader); } - if (entry._offsetOfLocalHeader >= dynamicDirtyStartingOffset) + if (entry.OffsetOfLocalHeader >= dynamicDirtyStartingOffset) { entry.LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded(); } @@ -721,7 +721,7 @@ private void WriteFile() // We don't always need to write the local header entry, ZipArchiveEntry is usually able to work out when it doesn't need to. // We want to force this header entry to be written (even for completely untouched entries) if the entry comes after one // which had a pending dynamically-sized write. - bool forceWriteLocalEntry = !entry._originallyInArchive || (entry._originallyInArchive && entry._offsetOfLocalHeader >= dynamicDirtyStartingOffset); + bool forceWriteLocalEntry = !entry.OriginallyInArchive || (entry.OriginallyInArchive && entry.OffsetOfLocalHeader >= dynamicDirtyStartingOffset); entry.WriteAndFinishLocalEntry(forceWriteLocalEntry); } @@ -734,7 +734,7 @@ private void WriteFile() { // The central directory needs to be rewritten if its position has moved, if there's a new entry in the archive, or if the entry might be different. bool centralDirectoryEntryRequiresUpdate = startOfCentralDirectory != _centralDirectoryStart - | (!entry._originallyInArchive || (entry._originallyInArchive && entry._offsetOfLocalHeader >= dynamicDirtyStartingOffset)); + | (!entry.OriginallyInArchive || (entry.OriginallyInArchive && entry.OffsetOfLocalHeader >= dynamicDirtyStartingOffset)); entry.WriteCentralDirectoryFileHeader(centralDirectoryEntryRequiresUpdate); archiveEpilogueRequiresUpdate |= centralDirectoryEntryRequiresUpdate; diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index bf60996a162047..83a6a0d318f376 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -17,18 +17,18 @@ namespace System.IO.Compression public partial class ZipArchiveEntry { private ZipArchive _archive; - internal readonly bool _originallyInArchive; + private readonly bool _originallyInArchive; private readonly uint _diskNumberStart; private readonly ZipVersionMadeByPlatform _versionMadeByPlatform; private ZipVersionNeededValues _versionMadeBySpecification; - internal ZipVersionNeededValues _versionToExtract; + private ZipVersionNeededValues _versionToExtract; private BitFlagValues _generalPurposeBitFlag; private readonly bool _isEncrypted; private CompressionMethodValues _storedCompressionMethod; private DateTimeOffset _lastModified; private long _compressedSize; private long _uncompressedSize; - internal long _offsetOfLocalHeader; + private long _offsetOfLocalHeader; private long? _storedOffsetOfCompressedData; private uint _crc32; // An array of buffers, each a maximum of MaxSingleBufferSize in size @@ -304,6 +304,10 @@ public long Length internal ZipArchive.ChangeState Changed { get; private set; } + internal bool OriginallyInArchive => _originallyInArchive; + + internal long OffsetOfLocalHeader => _offsetOfLocalHeader; + /// /// Deletes the entry from the archive. /// @@ -1422,8 +1426,8 @@ internal sealed class LocalHeaderOffsetComparer : Comparer // Newly added ZipArchiveEntry records should always go to the end of the file. public override int Compare(ZipArchiveEntry? x, ZipArchiveEntry? y) { - long xOffset = x != null && !x._originallyInArchive ? long.MaxValue : x?._offsetOfLocalHeader ?? long.MinValue; - long yOffset = y != null && !y._originallyInArchive ? long.MaxValue : y?._offsetOfLocalHeader ?? long.MinValue; + long xOffset = x != null && !x.OriginallyInArchive ? long.MaxValue : x?.OffsetOfLocalHeader ?? long.MinValue; + long yOffset = y != null && !y.OriginallyInArchive ? long.MaxValue : y?.OffsetOfLocalHeader ?? long.MinValue; return xOffset.CompareTo(yOffset); } From 84dcd75ee6baf24814b08ed5b235a03261b9fd3b Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Sun, 20 Oct 2024 18:12:29 +0100 Subject: [PATCH 5/9] Following code review feedback * Used named parameter when passing a constant to forceWrite. * Replaced two magic numbers with constants. * Small cleanup of ZipArchive.WriteFile for clarity. * Renamed ZipArchiveEntry.Changed to Changes. --- .../src/System/IO/Compression/ZipArchive.cs | 31 +++++++++---------- .../System/IO/Compression/ZipArchiveEntry.cs | 24 +++++++------- .../src/System/IO/Compression/ZipBlocks.cs | 2 ++ 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index a67a75ee4ae665..d0854de911ca4d 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -30,7 +30,6 @@ public class ZipArchive : IDisposable private readonly Stream? _backingStream; private byte[] _archiveComment; private Encoding? _entryNameAndCommentEncoding; - private long _firstDeletedEntryOffset; #if DEBUG_FORCE_ZIP64 @@ -427,7 +426,7 @@ internal void AcquireArchiveStream(ZipArchiveEntry entry) { if (!_archiveStreamOwner.EverOpenedForWrite) { - _archiveStreamOwner.WriteAndFinishLocalEntry(true); + _archiveStreamOwner.WriteAndFinishLocalEntry(forceWrite: true); } else { @@ -654,7 +653,7 @@ private void WriteFile() Debug.Assert(_readEntries); // Entries starting after this offset have had a dynamically-sized change. Everything on or after this point must be rewritten. - long dynamicDirtyStartingOffset = 0; + long completeRewriteStartingOffset = 0; List entriesToWrite = _entries; if (_mode == ZipArchiveMode.Update) @@ -663,20 +662,24 @@ private void WriteFile() // that single entry's metadata can be rewritten without impacting anything else. long startingOffset = _firstDeletedEntryOffset; long nextFileOffset = 0; - dynamicDirtyStartingOffset = startingOffset; + completeRewriteStartingOffset = startingOffset; entriesToWrite = new(_entries.Count); foreach (ZipArchiveEntry entry in _entries) { - if (entry.OriginallyInArchive) + if (!entry.OriginallyInArchive) + { + entriesToWrite.Add(entry); + } + else { - if (entry.Changed == ChangeState.Unchanged) + if (entry.Changes == ChangeState.Unchanged) { // Keep track of the expected position of the file entry after the final untouched file entry so that when the loop completes, // we'll know which position to start writing new entries from. nextFileOffset = Math.Max(nextFileOffset, entry.OffsetOfCompressedData + entry.CompressedLength); } - // When calculating the starting offset to load the files from, only look at dirty entries which are already in the archive. + // When calculating the starting offset to load the files from, only look at changed entries which are already in the archive. else { startingOffset = Math.Min(startingOffset, entry.OffsetOfLocalHeader); @@ -688,11 +691,11 @@ private void WriteFile() { // If the pending data to write is fixed-length metadata in the header, there's no need to load the full file for // inflation and deflation. - if ((entry.Changed & (ChangeState.DynamicLengthMetadata | ChangeState.StoredData)) != 0) + if ((entry.Changes & (ChangeState.DynamicLengthMetadata | ChangeState.StoredData)) != 0) { - dynamicDirtyStartingOffset = Math.Min(dynamicDirtyStartingOffset, entry.OffsetOfLocalHeader); + completeRewriteStartingOffset = Math.Min(completeRewriteStartingOffset, entry.OffsetOfLocalHeader); } - if (entry.OffsetOfLocalHeader >= dynamicDirtyStartingOffset) + if (entry.OffsetOfLocalHeader >= completeRewriteStartingOffset) { entry.LoadLocalHeaderExtraFieldAndCompressedBytesIfNeeded(); } @@ -700,10 +703,6 @@ private void WriteFile() entriesToWrite.Add(entry); } } - else - { - entriesToWrite.Add(entry); - } } // If the offset of entries to write from is still at long.MaxValue, then we know that nothing has been deleted, @@ -721,7 +720,7 @@ private void WriteFile() // We don't always need to write the local header entry, ZipArchiveEntry is usually able to work out when it doesn't need to. // We want to force this header entry to be written (even for completely untouched entries) if the entry comes after one // which had a pending dynamically-sized write. - bool forceWriteLocalEntry = !entry.OriginallyInArchive || (entry.OriginallyInArchive && entry.OffsetOfLocalHeader >= dynamicDirtyStartingOffset); + bool forceWriteLocalEntry = !entry.OriginallyInArchive || (entry.OriginallyInArchive && entry.OffsetOfLocalHeader >= completeRewriteStartingOffset); entry.WriteAndFinishLocalEntry(forceWriteLocalEntry); } @@ -734,7 +733,7 @@ private void WriteFile() { // The central directory needs to be rewritten if its position has moved, if there's a new entry in the archive, or if the entry might be different. bool centralDirectoryEntryRequiresUpdate = startOfCentralDirectory != _centralDirectoryStart - | (!entry.OriginallyInArchive || (entry.OriginallyInArchive && entry.OffsetOfLocalHeader >= dynamicDirtyStartingOffset)); + | (!entry.OriginallyInArchive || (entry.OriginallyInArchive && entry.OffsetOfLocalHeader >= completeRewriteStartingOffset)); entry.WriteCentralDirectoryFileHeader(centralDirectoryEntryRequiresUpdate); archiveEpilogueRequiresUpdate |= centralDirectoryEntryRequiresUpdate; diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index 454f68b1a7b547..afb83e1d86314a 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -88,7 +88,7 @@ internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd) _compressionLevel = MapCompressionLevel(_generalPurposeBitFlag, CompressionMethod); - Changed = ZipArchive.ChangeState.Unchanged; + Changes = ZipArchive.ChangeState.Unchanged; } // Initializes a ZipArchiveEntry instance for a new archive entry with a specified compression level. @@ -151,7 +151,7 @@ internal ZipArchiveEntry(ZipArchive archive, string entryName) _archive.AcquireArchiveStream(this); } - Changed = ZipArchive.ChangeState.Unchanged; + Changes = ZipArchive.ChangeState.Unchanged; } /// @@ -191,7 +191,7 @@ public int ExternalAttributes { ThrowIfInvalidArchive(); _externalFileAttr = (uint)value; - Changed |= ZipArchive.ChangeState.FixedLengthMetadata; + Changes |= ZipArchive.ChangeState.FixedLengthMetadata; } } @@ -214,7 +214,7 @@ public string Comment { _generalPurposeBitFlag |= BitFlagValues.UnicodeFileNameAndComment; } - Changed |= ZipArchive.ChangeState.DynamicLengthMetadata; + Changes |= ZipArchive.ChangeState.DynamicLengthMetadata; } } @@ -279,7 +279,7 @@ public DateTimeOffset LastWriteTime throw new ArgumentOutOfRangeException(nameof(value), SR.DateTimeOutOfRange); _lastModified = value; - Changed |= ZipArchive.ChangeState.FixedLengthMetadata; + Changes |= ZipArchive.ChangeState.FixedLengthMetadata; } } @@ -302,7 +302,7 @@ public long Length /// public string Name => ParseFileName(FullName, _versionMadeByPlatform); - internal ZipArchive.ChangeState Changed { get; private set; } + internal ZipArchive.ChangeState Changes { get; private set; } internal bool OriginallyInArchive => _originallyInArchive; @@ -549,9 +549,9 @@ internal void WriteCentralDirectoryFileHeader(bool forceWrite) extraFieldLength = (ushort)bigExtraFieldLength; } - if (_originallyInArchive && Changed == ZipArchive.ChangeState.Unchanged && !forceWrite) + if (_originallyInArchive && Changes == ZipArchive.ChangeState.Unchanged && !forceWrite) { - long centralDirectoryHeaderLength = 46 + long centralDirectoryHeaderLength = ZipCentralDirectoryFileHeader.SizeOfConstantHeaderFields + _storedEntryNameBytes.Length + (zip64Needed ? zip64ExtraField.TotalSize : 0) + (_cdUnknownExtraFields != null ? ZipGenericExtraField.TotalSize(_cdUnknownExtraFields) : 0) @@ -732,7 +732,7 @@ private WrappedStream OpenInWriteMode() _archive.DebugAssertIsStillArchiveStreamOwner(this); _everOpenedForWrite = true; - Changed |= ZipArchive.ChangeState.StoredData; + Changes |= ZipArchive.ChangeState.StoredData; CheckSumAndSizeWriteStream crcSizeStream = GetDataCompressor(_archive.ArchiveStream, true, (object? o, EventArgs e) => { // release the archive stream @@ -753,7 +753,7 @@ private WrappedStream OpenInUpdateMode() ThrowIfNotOpenable(needToUncompress: true, needToLoadIntoMemory: true); _everOpenedForWrite = true; - Changed |= ZipArchive.ChangeState.StoredData; + Changes |= ZipArchive.ChangeState.StoredData; _currentlyOpenForWrite = true; // always put it at the beginning for them UncompressedData.Seek(0, SeekOrigin.Begin); @@ -966,9 +966,9 @@ private bool WriteLocalFileHeader(bool isEmptyFile, bool forceWrite) // If this is an existing, unchanged entry then silently skip forwards. // If it's new or changed, write the header. - if (_originallyInArchive && Changed == ZipArchive.ChangeState.Unchanged && !forceWrite) + if (_originallyInArchive && Changes == ZipArchive.ChangeState.Unchanged && !forceWrite) { - writer.Seek(30 + _storedEntryNameBytes.Length, SeekOrigin.Current); + writer.Seek(ZipLocalFileHeader.SizeOfLocalHeader + _storedEntryNameBytes.Length, SeekOrigin.Current); if (zip64Used) { diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs index de7e833f588c20..ca5ffe90c7f9ba 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipBlocks.cs @@ -461,6 +461,8 @@ public static bool TrySkipBlock(BinaryReader reader) internal struct ZipCentralDirectoryFileHeader { public const uint SignatureConstant = 0x02014B50; + public const int SizeOfConstantHeaderFields = 46; + public byte VersionMadeByCompatibility; public byte VersionMadeBySpecification; public ushort VersionNeededToExtract; From 448aa4a3105425a12548559c68bb55a25327d095 Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Wed, 11 Dec 2024 21:15:41 +0000 Subject: [PATCH 6/9] Further code review: initial response --- .../src/System/IO/Compression/ZipArchive.cs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index d0854de911ca4d..a7c6e6163e6683 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -689,8 +689,7 @@ private void WriteFile() // NB: the existing ZipArchiveEntries are sorted in _entries by their position ascending. if (entry.OffsetOfLocalHeader >= startingOffset) { - // If the pending data to write is fixed-length metadata in the header, there's no need to load the full file for - // inflation and deflation. + // If the pending data to write is fixed-length metadata in the header, there's no need to load the compressed file bits. if ((entry.Changes & (ChangeState.DynamicLengthMetadata | ChangeState.StoredData)) != 0) { completeRewriteStartingOffset = Math.Min(completeRewriteStartingOffset, entry.OffsetOfLocalHeader); @@ -725,23 +724,23 @@ private void WriteFile() entry.WriteAndFinishLocalEntry(forceWriteLocalEntry); } - long startOfCentralDirectory = _archiveStream.Position; + long plannedCentralDirectoryPosition = _archiveStream.Position; // If there are no entries in the archive, we still want to create the archive epilogue. bool archiveEpilogueRequiresUpdate = _entries.Count == 0; foreach (ZipArchiveEntry entry in _entries) { // The central directory needs to be rewritten if its position has moved, if there's a new entry in the archive, or if the entry might be different. - bool centralDirectoryEntryRequiresUpdate = startOfCentralDirectory != _centralDirectoryStart - | (!entry.OriginallyInArchive || (entry.OriginallyInArchive && entry.OffsetOfLocalHeader >= completeRewriteStartingOffset)); + bool centralDirectoryEntryRequiresUpdate = plannedCentralDirectoryPosition != _centralDirectoryStart + || !entry.OriginallyInArchive || entry.OffsetOfLocalHeader >= completeRewriteStartingOffset; entry.WriteCentralDirectoryFileHeader(centralDirectoryEntryRequiresUpdate); archiveEpilogueRequiresUpdate |= centralDirectoryEntryRequiresUpdate; } - long sizeOfCentralDirectory = _archiveStream.Position - startOfCentralDirectory; + long sizeOfCentralDirectory = _archiveStream.Position - plannedCentralDirectoryPosition; - WriteArchiveEpilogue(startOfCentralDirectory, sizeOfCentralDirectory, archiveEpilogueRequiresUpdate); + WriteArchiveEpilogue(plannedCentralDirectoryPosition, sizeOfCentralDirectory, archiveEpilogueRequiresUpdate); // If entries have been removed and new (smaller) ones added, there could be empty space at the end of the file. // Shrink the file to reclaim this space. @@ -780,7 +779,7 @@ private void WriteArchiveEpilogue(long startOfCentralDirectory, long sizeOfCentr } // write normal eocd - if (centralDirectoryChanged | (Changed != ChangeState.Unchanged)) + if (centralDirectoryChanged || (Changed != ChangeState.Unchanged)) { ZipEndOfCentralDirectoryBlock.WriteBlock(_archiveStream, _entries.Count, startOfCentralDirectory, sizeOfCentralDirectory, _archiveComment); } From cd70a278e8388e4fac63137fe1fa8c5ab4717103 Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Thu, 19 Dec 2024 06:57:23 +0000 Subject: [PATCH 7/9] Further code review comments The list of entries in a ZipArchive is now only sorted when opened in Update mode. Added/modified a test to verify that these entries appear in the correct order: offset order when opened in Update mode, central directory entry order when opened in Read mode. --- .../src/System/IO/Compression/ZipArchive.cs | 10 +- .../tests/ZipArchive/zip_ReadTests.cs | 100 +++++++++++++++++- 2 files changed, 104 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs index a7c6e6163e6683..a8179b0afa7226 100644 --- a/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs +++ b/src/libraries/System.IO.Compression/src/System/IO/Compression/ZipArchive.cs @@ -518,11 +518,15 @@ private void ReadCentralDirectory() numberOfEntries++; } - // Sort _entries by each archive entry's position - _entries.Sort(ZipArchiveEntry.LocalHeaderOffsetComparer.Instance); - if (numberOfEntries != _expectedNumberOfEntries) throw new InvalidDataException(SR.NumEntriesWrong); + + // Sort _entries by each archive entry's position. This supports the algorithm in WriteFile, so is only + // necessary when the ZipArchive has been opened in Update mode. + if (Mode == ZipArchiveMode.Update) + { + _entries.Sort(ZipArchiveEntry.LocalHeaderOffsetComparer.Instance); + } } catch (EndOfStreamException ex) { diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs index 3c2a74cb48aa2b..fa589136cd51f9 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_ReadTests.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Binary; using System.Collections.Generic; using System.Threading.Tasks; using Xunit; @@ -274,17 +275,19 @@ public static async Task EnsureDisposeIsCalledAsExpectedOnTheUnderlyingStream(bo } [Fact] - public static void ArchivesInOffsetOrder() + public static void ArchivesInOffsetOrder_UpdateMode() { + // When the ZipArchive which has been opened in Update mode is disposed of, its entries will be rewritten in order of their offset within the file. + // This requires the entries to be sorted when the file is opened. byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; - byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + byte[] sampleZipFile = ReverseCentralDirectoryEntries(CreateZipFile(50, sampleEntryContents)); using (MemoryStream ms = new MemoryStream()) { ms.Write(sampleZipFile); ms.Seek(0, SeekOrigin.Begin); - ZipArchive source = new ZipArchive(ms, ZipArchiveMode.Read, leaveOpen: true); + ZipArchive source = new ZipArchive(ms, ZipArchiveMode.Update, leaveOpen: true); long previousOffset = long.MinValue; System.Reflection.FieldInfo offsetOfLocalHeader = typeof(ZipArchiveEntry).GetField("_offsetOfLocalHeader", System.Reflection.BindingFlags.NonPublic | Reflection.BindingFlags.Instance); @@ -294,12 +297,103 @@ public static void ArchivesInOffsetOrder() long offset = (long)offsetOfLocalHeader.GetValue(entry); Assert.True(offset > previousOffset); + previousOffset = offset; + } + + source.Dispose(); + } + } + + [Fact] + public static void ArchivesInCentralDirectoryOrder_ReadMode() + { + // When the ZipArchive is opened in Read mode, no sort is necessary. The entries will be added to the ZipArchive in the order + // that they appear in the central directory (in this case, sorted by offset descending.) + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = ReverseCentralDirectoryEntries(CreateZipFile(50, sampleEntryContents)); + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + ZipArchive source = new ZipArchive(ms, ZipArchiveMode.Read, leaveOpen: true); + long previousOffset = long.MaxValue; + System.Reflection.FieldInfo offsetOfLocalHeader = typeof(ZipArchiveEntry).GetField("_offsetOfLocalHeader", System.Reflection.BindingFlags.NonPublic | Reflection.BindingFlags.Instance); + + for (int i = 0; i < source.Entries.Count; i++) + { + ZipArchiveEntry entry = source.Entries[i]; + long offset = (long)offsetOfLocalHeader.GetValue(entry); + + Assert.True(offset < previousOffset); + previousOffset = offset; } source.Dispose(); } } + private static byte[] ReverseCentralDirectoryEntries(byte[] zipFile) + { + byte[] destinationBuffer = new byte[zipFile.Length]; + + // Inspect the "end of central directory" header. This is the final 22 bytes of the file, and it contains the offset and the size + // of the central directory. + int eocdHeaderOffset_CentralDirectoryPosition = zipFile.Length - 6; + int eocdHeaderOffset_CentralDirectoryLength = zipFile.Length - 10; + int centralDirectoryPosition = BinaryPrimitives.ReadInt32LittleEndian(zipFile.AsSpan(eocdHeaderOffset_CentralDirectoryPosition, sizeof(int))); + int centralDirectoryLength = BinaryPrimitives.ReadInt32LittleEndian(zipFile.AsSpan(eocdHeaderOffset_CentralDirectoryLength, sizeof(int))); + List centralDirectoryRanges = new List(); + + Assert.True(centralDirectoryPosition + centralDirectoryLength < zipFile.Length); + + // With the starting position of the central directory in hand, work through each entry, recording its starting position and its length. + for (int currPosition = centralDirectoryPosition; currPosition < centralDirectoryPosition + centralDirectoryLength;) + { + // The length of a central directory entry is determined by the length of its static components (46 bytes), plus the length of its filename + // (offset 28), extra fields (offset 30) and file comment (offset 32). + short filenameLength = BinaryPrimitives.ReadInt16LittleEndian(zipFile.AsSpan(currPosition + 28, sizeof(short))); + short extraFieldLength = BinaryPrimitives.ReadInt16LittleEndian(zipFile.AsSpan(currPosition + 30, sizeof(short))); + short fileCommentLength = BinaryPrimitives.ReadInt16LittleEndian(zipFile.AsSpan(currPosition + 32, sizeof(short))); + int totalHeaderLength = 46 + filenameLength + extraFieldLength + fileCommentLength; + + // The sample data generated by the tests should never have extra fields and comments. + Assert.True(filenameLength > 0); + Assert.True(extraFieldLength == 0); + Assert.True(fileCommentLength == 0); + + centralDirectoryRanges.Add(new Range(currPosition, currPosition + totalHeaderLength)); + currPosition += totalHeaderLength; + } + + // Begin building the destination archive. The file contents (everything up to the central directory header) can be copied as-is. + zipFile.AsSpan(0, centralDirectoryPosition).CopyTo(destinationBuffer); + + int cumulativeCentralDirectoryLength = 0; + + // Reverse the order of the central directory entries + foreach (Range cdHeader in centralDirectoryRanges) + { + Span sourceSpan = zipFile.AsSpan(cdHeader); + Span destSpan; + + cumulativeCentralDirectoryLength += sourceSpan.Length; + Assert.True(cumulativeCentralDirectoryLength <= centralDirectoryLength); + + destSpan = destinationBuffer.AsSpan(centralDirectoryPosition + centralDirectoryLength - cumulativeCentralDirectoryLength, sourceSpan.Length); + sourceSpan.CopyTo(destSpan); + } + + Assert.Equal(centralDirectoryLength, cumulativeCentralDirectoryLength); + Assert.Equal(22, destinationBuffer.Length - centralDirectoryPosition - centralDirectoryLength); + + // Copy the "end of central directory header" entry to the destination buffer. + zipFile.AsSpan(zipFile.Length - 22).CopyTo(destinationBuffer.AsSpan(destinationBuffer.Length - 22)); + + return destinationBuffer; + } + private class DisposeCallCountingStream : MemoryStream { public int NumberOfDisposeCalls { get; private set; } From ccb4a83b566c8a88940472d840005e806f3ee903 Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Thu, 23 Jan 2025 21:04:55 +0000 Subject: [PATCH 8/9] Correct the write counts in tests This accounts for the removal of BinaryReader in an earlier PR --- .../tests/ZipArchive/zip_UpdateTests.cs | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs index c66a3b8dfb43ef..fd8637ee90515a 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs @@ -414,11 +414,11 @@ public void Update_PerformMinimalWritesWhenFixedLengthEntryHeaderFieldChanged(in writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; // As above, check 1: the number of writes performed should be minimal. - // 11 writes per archive entry for the local file header. - // 18 writes per archive entry for the central directory header. - // 8 writes (sometimes 9, if there's a comment) for the end of central directory block. + // 2 writes per archive entry for the local file header. + // 2 writes per archive entry for the central directory header. + // 1 write (sometimes 2, if there's a comment) for the end of central directory block. // The EOCD block won't change as a result of our modifications, so is excluded from the counts. - Assert.Equal(((11 + 18) * entriesToModify), writesCalled + writeBytesCalled); + Assert.Equal(((2 + 2) * entriesToModify), writesCalled + writeBytesCalled); trackingStream.Seek(0, SeekOrigin.Begin); target = new ZipArchive(trackingStream, ZipArchiveMode.Read); @@ -514,24 +514,24 @@ public void Update_PerformMinimalWritesWithDataAndHeaderChanges(int dataChangeIn // If the data changed first, then every entry after it will be written in full. If the fixed-length // metadata changed first, some entries which won't have been fully written - just updated in place. - // 11 writes per archive entry for the local file header. - // 18 writes per archive entry for the central directory header. - // 4 writes for the file data of the updated entry itself + // 2 writes per archive entry for the local file header. + // 2 writes per archive entry for the central directory header. + // 2 writes for the file data of the updated entry itself // 1 write per archive entry for the file data of other entries after this in the file - // 8 writes (sometimes 9, if there's a comment) for the end of central directory block. + // 1 write (sometimes 2, if there's a comment) for the end of central directory block. // All of the central directory headers must be rewritten after an entry's data has been modified. if (dataChangeIndex <= lastWriteTimeChangeIndex || lastWriteTimeChangeIndex == -1) { // dataChangeIndex -> totalEntries: rewrite in full // all central directories headers - Assert.Equal(8 + 3 + ((11 + 1) * expectedEntriesToWrite) + (18 * totalEntries), writesCalled + writeBytesCalled); + Assert.Equal(1 + 1 + ((2 + 1) * expectedEntriesToWrite) + (2 * totalEntries), writesCalled + writeBytesCalled); } else { // lastWriteTimeChangeIndex: partial rewrite // dataChangeIndex -> totalEntries: rewrite in full // all central directory headers - Assert.Equal(8 + 3 + ((11 + 1) * expectedEntriesToWrite) + (18 * totalEntries) + 11, writesCalled + writeBytesCalled); + Assert.Equal(1 + 1 + ((2 + 1) * expectedEntriesToWrite) + (2 * totalEntries) + 2, writesCalled + writeBytesCalled); } trackingStream.Seek(0, SeekOrigin.Begin); @@ -579,8 +579,8 @@ public async Task Update_PerformMinimalWritesWhenArchiveCommentChanged() writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; - // We expect 9 writes for the end of central directory block - 8 for the EOCD, 1 for the comment. - Assert.Equal(9, writesCalled + writeBytesCalled); + // We expect 2 writes for the end of central directory block - 1 for the EOCD, 1 for the comment. + Assert.Equal(2, writesCalled + writeBytesCalled); trackingStream.Seek(0, SeekOrigin.Begin); @@ -644,19 +644,19 @@ public void Update_PerformMinimalWritesWhenEntriesModifiedAndDeleted(int modifyI writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; - // 11 writes per archive entry for the local file header. - // 18 writes per archive entry for the central directory header. - // 4 writes for the file data of the updated entry itself + // 2 writes per archive entry for the local file header. + // 2 writes per archive entry for the central directory header. + // 2 writes for the file data of the updated entry itself // 1 write per archive entry for the file data of other entries after this in the file - // 8 writes (sometimes 9, if there's a comment) for the end of central directory block. + // 1 write (sometimes 2, if there's a comment) for the end of central directory block. // All of the central directory headers must be rewritten after an entry's data has been modified. if (modifyIndex == -1) { - Assert.Equal(8 + ((11 + 1) * expectedEntriesToWrite) + (18 * (totalEntries - 1)), writesCalled + writeBytesCalled); + Assert.Equal(1 + ((2 + 1) * expectedEntriesToWrite) + (2 * (totalEntries - 1)), writesCalled + writeBytesCalled); } else { - Assert.Equal(8 + 3 + ((11 + 1) * expectedEntriesToWrite) + (18 * (totalEntries - 1)), writesCalled + writeBytesCalled); + Assert.Equal(1 + 1 + ((2 + 1) * expectedEntriesToWrite) + (2 * (totalEntries - 1)), writesCalled + writeBytesCalled); } trackingStream.Seek(0, SeekOrigin.Begin); From 226ec0df9db4ddb890a3b1d8726b8d4f63070c96 Mon Sep 17 00:00:00 2001 From: Edward Neal <55035479+edwardneal@users.noreply.github.com> Date: Thu, 23 Jan 2025 22:02:21 +0000 Subject: [PATCH 9/9] Extra test: Update_PerformMinimalWritesWhenEntriesModifiedAndAdded This test modifies an entry at a specific index, adds N entries after it, then verifies that they've been written in the expected order: existing entries first, new entries afterwards --- .../tests/ZipArchive/zip_UpdateTests.cs | 84 +++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs index fd8637ee90515a..6b4dcadf01a280 100644 --- a/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs +++ b/src/libraries/System.IO.Compression/tests/ZipArchive/zip_UpdateTests.cs @@ -685,5 +685,89 @@ public void Update_PerformMinimalWritesWhenEntriesModifiedAndDeleted(int modifyI } } } + + [Theory] + [InlineData(1)] + [InlineData(5)] + [InlineData(10)] + [InlineData(12)] + public void Update_PerformMinimalWritesWhenEntriesModifiedAndAdded(int entriesToCreate) + { + byte[] sampleEntryContents = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + byte[] sampleZipFile = CreateZipFile(50, sampleEntryContents); + + using (MemoryStream ms = new MemoryStream()) + { + ms.Write(sampleZipFile); + ms.Seek(0, SeekOrigin.Begin); + + using (CallTrackingStream trackingStream = new CallTrackingStream(ms)) + { + // Open the archive in Update mode. Rewrite the data of the first entry and add five entries + // to the end of the archive. Verify the correct number of writes performed as a result, then + // reopen the same archive, get the entries, make sure that the right number of entries have + // been found and that the entries have the correct contents. + int writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)); + int writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)); + ZipArchive target = new ZipArchive(trackingStream, ZipArchiveMode.Update, leaveOpen: true); + int totalEntries = target.Entries.Count; + ZipArchiveEntry entryToRewrite = target.Entries[^1]; + string modifiedPath = entryToRewrite.FullName; + + using (Stream entryStream = entryToRewrite.Open()) + { + entryStream.Seek(0, SeekOrigin.Begin); + for (int i = 0; i < 100; i++) + { + entryStream.Write(sampleEntryContents); + } + } + + for (int i = 0; i < entriesToCreate; i++) + { + ZipArchiveEntry createdEntry = target.CreateEntry($"added/{i}.bin"); + + using (Stream entryWriteStream = createdEntry.Open()) + { + entryWriteStream.Write(sampleEntryContents); + entryWriteStream.WriteByte((byte)((i + totalEntries) % byte.MaxValue)); + } + } + + target.Dispose(); + + writesCalled = trackingStream.TimesCalled(nameof(trackingStream.Write)) - writesCalled; + writeBytesCalled = trackingStream.TimesCalled(nameof(trackingStream.WriteByte)) - writeBytesCalled; + + // 2 writes per archive entry for the local file header. + // 2 writes per archive entry for the central directory header. + // 2 writes for the file data of the updated entry itself + // 1 write (sometimes 2, if there's a comment) for the end of central directory block. + // All of the central directory headers must be rewritten after an entry's data has been modified. + + Assert.Equal(1 + ((2 + 2 + 2) * entriesToCreate) + (2 * (totalEntries - 1) + (2 + 2 + 2)), writesCalled + writeBytesCalled); + + trackingStream.Seek(0, SeekOrigin.Begin); + target = new ZipArchive(trackingStream, ZipArchiveMode.Read); + + // Check 2: no other data has been corrupted as a result + for (int i = 0; i < totalEntries + entriesToCreate; i++) + { + ZipArchiveEntry entry = target.Entries[i]; + byte[] expectedBuffer = entry.FullName == modifiedPath + ? Enumerable.Repeat(sampleEntryContents, 100).SelectMany(x => x).ToArray() + : [.. sampleEntryContents, (byte)(i % byte.MaxValue)]; + byte[] readBuffer = new byte[expectedBuffer.Length]; + + using (Stream readStream = entry.Open()) + { + readStream.Read(readBuffer.AsSpan()); + } + + Assert.Equal(expectedBuffer, readBuffer); + } + } + } + } } }