-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Description
There are various places in System.IO.Compression.ZipArchive* that are currently calling stream.Read
and assuming the number of bytes returned will be the requested one. This method does not work like that, it can return less than the requested number of bytes.
This is a problem when using a stream whose Read method constantly returns less than the requested number of bytes. I tested the extreme scenario of creating my own custom Stream implementation, overriding the Read methods and forcing them to always return one byte at a time. This causes valid archives to throw unexpected exceptions when calling archive.Entries, as EnsureCentralDirectoryRead will be unable to finish reading all the entry bytes because many places assume that the number of bytes returned is insufficient and exit early.
The fix is to replace these Read calls with ReadExactly or ReadAtLeast, depending on each case (the former always throws if not enough bytes, the latter can optionally not throw on end of stream).
@edwardneal would you be interested in looking into this?
Here's my custom stream implementation:
Expand
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
internal class ReadReturnsOneByteAtATimeStream : Stream
{
private readonly Stream _underlyingStream;
public ReadReturnsOneByteAtATimeStream() : this(new MemoryStream()) { }
public ReadReturnsOneByteAtATimeStream(Stream underlyingStream) => _underlyingStream = underlyingStream;
public override bool CanRead => _underlyingStream.CanRead;
public override bool CanSeek => _underlyingStream.CanSeek;
public override bool CanWrite => _underlyingStream.CanWrite;
public override long Length => _underlyingStream.Length;
public override long Position
{
get => _underlyingStream.Position;
set => _underlyingStream.Position = value;
}
public override void CopyTo(Stream destination, int bufferSize) => _underlyingStream.CopyTo(destination, bufferSize);
public override Task CopyToAsync(Stream destination, int bufferSize, CancellationToken cancellationToken) => _underlyingStream.CopyToAsync(destination, bufferSize, cancellationToken);
public new void Dispose() => _underlyingStream.Dispose();
public override ValueTask DisposeAsync() => _underlyingStream.DisposeAsync();
public override void Flush() => _underlyingStream.Flush();
public override Task FlushAsync(CancellationToken cancellationToken) => _underlyingStream.FlushAsync(cancellationToken);
/// <summary>
/// Reads one byte at a time.
/// </summary>
public override int Read(byte[] buffer, int offset, int count) =>
_underlyingStream.ReadAtLeast(buffer.AsSpan(0, 1), 1, throwOnEndOfStream: false);
/// <summary>
/// Reads one byte at a time.
/// </summary>
public override int Read(Span<byte> buffer) =>
_underlyingStream.ReadAtLeast(buffer.Slice(0, 1), 1, throwOnEndOfStream: false);
/// <summary>
/// Reads one byte at a time.
/// </summary>
public override async Task<int> ReadAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) =>
await _underlyingStream.ReadAtLeastAsync(buffer.AsMemory(0, 1), 1, throwOnEndOfStream: false, cancellationToken);
/// <summary>
/// Reads one byte at a time.
/// </summary>
public override async ValueTask<int> ReadAsync(Memory<byte> buffer, CancellationToken cancellationToken = default) =>
await _underlyingStream.ReadAtLeastAsync(buffer.Slice(0, 1), 1, throwOnEndOfStream: false, cancellationToken);
public new int ReadAtLeast(Span<byte> buffer, int minimumBytes, bool throwOnEndOfStream = true) => _underlyingStream.ReadAtLeast(buffer, minimumBytes, throwOnEndOfStream);
public new ValueTask<int> ReadAtLeastAsync(Memory<byte> buffer, int minimumBytes, bool throwOnEndOfStream = true, CancellationToken cancellationToken = default) => _underlyingStream.ReadAtLeastAsync(buffer, minimumBytes, throwOnEndOfStream, cancellationToken);
public override int ReadByte() => _underlyingStream.ReadByte();
public new void ReadExactly(Span<byte> buffer) => _underlyingStream.ReadExactly(buffer);
public new void ReadExactly(byte[] buffer, int offset, int count) => _underlyingStream.ReadExactly(buffer, offset, count);
public new ValueTask ReadExactlyAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken = default) => _underlyingStream.ReadExactlyAsync(buffer, offset, count, cancellationToken);
public new ValueTask ReadExactlyAsync(Memory<byte> buffer, CancellationToken cancellationToken = default) => _underlyingStream.ReadExactlyAsync(buffer, cancellationToken);
public override long Seek(long offset, SeekOrigin origin) => _underlyingStream.Seek(offset, origin);
public override void SetLength(long value) => _underlyingStream.SetLength(value);
public override void Write(byte[] buffer, int offset, int count) => _underlyingStream.Write(buffer, offset, count);
public override void Write(ReadOnlySpan<byte> buffer) => _underlyingStream.Write(buffer);
public override Task WriteAsync(byte[] buffer, int offset, int count, CancellationToken cancellationToken) => _underlyingStream.WriteAsync(buffer, offset, count, cancellationToken);
public override ValueTask WriteAsync(ReadOnlyMemory<byte> buffer, CancellationToken cancellationToken = default) => _underlyingStream.WriteAsync(buffer, cancellationToken);
public override void WriteByte(byte value) => _underlyingStream.WriteByte(value);
}
And here is a test with a valid zip that throws:
Expand
[Fact]
public static void ReadCentralDirectory_Always_Reads_ExpectedNumberOfBytes()
{
var ms = new ReadReturnsOneByteAtATimeStream();
using (var archiveWrite = new ZipArchive(ms, ZipArchiveMode.Create, leaveOpen: true))
{
archiveWrite.CreateEntry("1.txt");
archiveWrite.CreateEntry("2.txt");
archiveWrite.CreateEntry("3.txt");
}
ms.Position = 0;
using ZipArchive archive = new ZipArchive(ms);
int i = 0;
foreach (var entry in archive.Entries)
{
Assert.Equal($"{i++}.txt", entry.Name);
}
}