Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/SharpCompress/Factories/TarFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public override bool IsArchive(Stream stream, string? password = null)
if (wrapper.IsMatch(sharpCompressStream))
{
sharpCompressStream.Rewind();
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var decompressedStream = CreateProbeDecompressionStream(
sharpCompressStream,
wrapper.CompressionType,
Expand Down Expand Up @@ -95,6 +96,7 @@ await wrapper
)
{
sharpCompressStream.Rewind();
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var decompressedStream = await CreateProbeDecompressionStreamAsync(
sharpCompressStream,
wrapper.CompressionType,
Expand Down Expand Up @@ -326,6 +328,7 @@ public IReader OpenReader(Stream stream, ReaderOptions? options)
if (wrapper.IsMatch(sharpCompressStream))
{
sharpCompressStream.Rewind();
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var decompressedStream = CreateProbeDecompressionStream(
sharpCompressStream,
wrapper.CompressionType,
Expand Down Expand Up @@ -363,6 +366,7 @@ await wrapper
)
{
sharpCompressStream.Rewind();
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var decompressedStream = await CreateProbeDecompressionStreamAsync(
sharpCompressStream,
wrapper.CompressionType,
Expand Down
18 changes: 16 additions & 2 deletions src/SharpCompress/Factories/TarWrapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,24 @@ public class TarWrapper(
Func<Stream, Stream> createStream,
Func<Stream, CancellationToken, ValueTask<Stream>> createStreamAsync,
IEnumerable<string> knownExtensions,
bool wrapInSharpCompressStream = true
bool wrapInSharpCompressStream = true,
int? minimumRewindBufferSize = null
)
{
public CompressionType CompressionType { get; } = type;
public Func<Stream, bool> IsMatch { get; } = canHandle;
public Func<Stream, CancellationToken, ValueTask<bool>> IsMatchAsync { get; } = canHandleAsync;
public bool WrapInSharpCompressStream { get; } = wrapInSharpCompressStream;

/// <summary>
/// The minimum ring buffer size required to detect and probe this format.
/// Format detection reads a decompressed block to check the tar header, so
/// the ring buffer must be large enough to hold the compressed bytes consumed
/// during that probe. Defaults to <see cref="Common.Constants.RewindableBufferSize"/>.
/// </summary>
public int MinimumRewindBufferSize { get; } =
minimumRewindBufferSize ?? Common.Constants.RewindableBufferSize;

public Func<Stream, Stream> CreateStream { get; } = createStream;
public Func<Stream, CancellationToken, ValueTask<Stream>> CreateStreamAsync { get; } =
createStreamAsync;
Expand Down Expand Up @@ -57,7 +67,11 @@ public class TarWrapper(
await BZip2Stream
.CreateAsync(stream, CompressionMode.Decompress, false)
.ConfigureAwait(false),
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"]
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"],
// BZip2 decompresses in whole blocks; the compressed size of the first block
// can be close to the uncompressed maximum (9 × 100 000 = 900 000 bytes).
// The ring buffer must hold all compressed bytes read during format detection.
minimumRewindBufferSize: BZip2Constants.baseBlockSize * 9

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot are there more kinds than just BZip2 to worry about? Zstandard?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only BZip2 needs the enlarged buffer. The other formats all decompress incrementally:

  • ZStandard reads from the source in chunks of ZSTD_DStreamInSize() ≈ 128 KB and produces output from each chunk before reading more. The default 160 KB ring buffer is larger, so no special treatment is needed.
  • GZip (DEFLATE), LZip (LZMA), XZ (LZMA2), and LZW are all streaming/dictionary-based decompressors that produce output while consuming input — well within the default 160 KB buffer.

BZip2 is unique in that it must buffer and decode an entire compressed block before producing any output, and at compression level 9 that block can be up to 900 KB of compressed data.

),
new(
CompressionType.GZip,
Expand Down
31 changes: 31 additions & 0 deletions src/SharpCompress/IO/SharpCompressStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,37 @@ public virtual void StartRecording()
_isRecording = true;
}

/// <summary>
/// Ensures the ring buffer has at least the specified minimum capacity.
/// If the current buffer is smaller, it is replaced with a larger one while
/// preserving all existing buffered data. Call this after detecting a compression
/// format that requires a larger buffer for format detection (e.g. BZip2 whose
/// first block can be up to 900 KB of compressed data).
/// </summary>
/// <param name="minSize">Minimum required ring buffer capacity in bytes.</param>
internal void EnsureMinimumRewindBufferSize(int minSize)
Comment thread
adamhathcock marked this conversation as resolved.
Outdated
{
if (_isPassthrough || _ringBuffer is null || _ringBuffer.Capacity >= minSize)
{
return;
}

// Create a new larger buffer with the required capacity
var newBuffer = new RingBuffer(minSize);

// Preserve existing buffered data in the new buffer
var existingLength = _ringBuffer.Length;
if (existingLength > 0)
{
var existingData = new byte[existingLength];
_ringBuffer.ReadFromEnd(existingLength, existingData, 0, existingLength);
newBuffer.Write(existingData, 0, existingLength);
}

_ringBuffer.Dispose();
_ringBuffer = newBuffer;
}

public override bool CanRead => true;

public override bool CanSeek => !_isPassthrough || stream.CanSeek;
Expand Down
2 changes: 2 additions & 0 deletions src/SharpCompress/Readers/Tar/TarReader.Factory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ public static async ValueTask<IAsyncReader> OpenAsyncReader(
}

sharpCompressStream.Position = pos;
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var testStream = await CreateProbeDecompressionStreamAsync(
sharpCompressStream,
wrapper.CompressionType,
Expand Down Expand Up @@ -182,6 +183,7 @@ public static IReader OpenReader(Stream stream, ReaderOptions? readerOptions = n
}

sharpCompressStream.Position = pos;
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var testStream = CreateProbeDecompressionStream(
sharpCompressStream,
wrapper.CompressionType,
Expand Down
12 changes: 6 additions & 6 deletions src/SharpCompress/packages.lock.json
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@
"net10.0": {
"Microsoft.NET.ILLink.Tasks": {
"type": "Direct",
"requested": "[10.0.2, )",
"resolved": "10.0.2",
"contentHash": "sXdDtMf2qcnbygw9OdE535c2lxSxrZP8gO4UhDJ0xiJbl1wIqXS1OTcTDFTIJPOFd6Mhcm8gPEthqWGUxBsTqw=="
"requested": "[10.0.0, )",
"resolved": "10.0.0",
"contentHash": "kICGrGYEzCNI3wPzfEXcwNHgTvlvVn9yJDhSdRK+oZQy4jvYH529u7O0xf5ocQKzOMjfS07+3z9PKRIjrFMJDA=="
},
"Microsoft.NETFramework.ReferenceAssemblies": {
"type": "Direct",
Expand Down Expand Up @@ -442,9 +442,9 @@
"net8.0": {
"Microsoft.NET.ILLink.Tasks": {
"type": "Direct",
"requested": "[8.0.23, )",
"resolved": "8.0.23",
"contentHash": "GqHiB1HbbODWPbY/lc5xLQH8siEEhNA0ptpJCC6X6adtAYNEzu5ZlqV3YHA3Gh7fuEwgA8XqVwMtH2KNtuQM1Q=="
"requested": "[8.0.22, )",
"resolved": "8.0.22",
"contentHash": "MhcMithKEiyyNkD2ZfbDZPmcOdi0GheGfg8saEIIEfD/fol3iHmcV8TsZkD4ZYz5gdUuoX4YtlVySUU7Sxl9SQ=="
},
"Microsoft.NETFramework.ReferenceAssemblies": {
"type": "Direct",
Expand Down
92 changes: 92 additions & 0 deletions tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,96 @@ public void Position_SetWithinRecordedRange_Succeeds()
Assert.Equal(3, readBuffer[0]);
Assert.Equal(4, readBuffer[1]);
}

[Fact]
public void EnsureMinimumRewindBufferSize_ExpandsSmallBuffer_PreservesExistingData()
{
// Arrange: create a stream with a small initial buffer (size 10)
var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 });
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs, 10);
stream.StartRecording();

// Read 4 bytes — they are now in the ring buffer
var buffer = new byte[8];
stream.Read(buffer, 0, 4);
Assert.Equal(4, stream.Position);

// Rewind to verify 4 bytes are present
stream.Rewind();

// Act: expand the ring buffer to 200 bytes while data is present
stream.EnsureMinimumRewindBufferSize(200);

// Verify the data is still replayable after expansion
var readBuffer = new byte[4];
stream.Read(readBuffer, 0, 4);
Assert.Equal(1, readBuffer[0]);
Assert.Equal(2, readBuffer[1]);
Assert.Equal(3, readBuffer[2]);
Assert.Equal(4, readBuffer[3]);
}

[Fact]
public void EnsureMinimumRewindBufferSize_BufferAlreadyLarger_DoesNotShrink()
{
// Arrange: create a stream with a large initial buffer (size 200)
var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5 });
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs, 200);
stream.StartRecording();
stream.Read(new byte[5], 0, 5);

// Act: request a smaller minimum — buffer should stay at 200
stream.EnsureMinimumRewindBufferSize(50);

// Assert: buffer can still hold the 5 bytes written before expansion request
stream.Rewind();
var readBuffer = new byte[5];
stream.Read(readBuffer, 0, 5);
Assert.Equal(1, readBuffer[0]);
Assert.Equal(5, readBuffer[4]);
}

[Fact]
public void EnsureMinimumRewindBufferSize_AllowsRewindAfterLargeRead()
{
// Simulate the BZip2 scenario: small initial buffer, expand after format detection,
// then verify a large read still allows Rewind.
const int initialSize = 10;
const int expandedSize = 100;
const int largeReadSize = 80;

var data = new byte[100];
for (var i = 0; i < data.Length; i++)
{
data[i] = (byte)(i + 1);
}

var ms = new MemoryStream(data);
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs, initialSize);
stream.StartRecording();

// Read 4 bytes (format detection — magic bytes)
var buffer = new byte[4];
stream.Read(buffer, 0, 4);
stream.Rewind();

// Expand the ring buffer to cover the anticipated large probe read
stream.EnsureMinimumRewindBufferSize(expandedSize);

// Read a large amount (simulating BZip2 block decompression)
var largeBuffer = new byte[largeReadSize];
stream.Read(largeBuffer, 0, largeReadSize);

// Rewind must succeed even though largeReadSize > initialSize
stream.Rewind();

// Verify data replays correctly
var verifyBuffer = new byte[largeReadSize];
stream.Read(verifyBuffer, 0, largeReadSize);
Assert.Equal(data[0], verifyBuffer[0]);
Assert.Equal(data[largeReadSize - 1], verifyBuffer[largeReadSize - 1]);
}
}
49 changes: 49 additions & 0 deletions tests/SharpCompress.Test/Tar/TarReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
using System.Collections.Generic;
using System.IO;
using SharpCompress.Common;
using SharpCompress.Compressors.BZip2;
using SharpCompress.Factories;
using SharpCompress.Readers;
using SharpCompress.Readers.Tar;
using SharpCompress.Test.Mocks;
Expand Down Expand Up @@ -58,6 +60,53 @@ public void Tar_Skip()
[Fact]
public void Tar_GZip_OldGnu_Reader() => Read("Tar.oldgnu.tar.gz", CompressionType.GZip);

[Fact]
public void Tar_BZip2_Reader_NonSeekable()
{
// Regression test for: Dynamic default RingBuffer for BZip2
// Opening a .tar.bz2 from a non-seekable stream should succeed
// because EnsureMinimumRewindBufferSize expands the ring buffer
// to hold the BZip2 block before calling IsTarFile.
Comment on lines +68 to +69

Copilot AI Apr 1, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This regression test comment references EnsureMinimumRewindBufferSize expanding the ring buffer, but there is no such API in the codebase and the implementation appears to rely on allocating the correct buffer size up-front (e.g., via TarWrapper.MaximumRewindBufferSize / StartRecording(minBufferSize)). Please update the comment to match the actual mechanism so the test remains an accurate explanation of the failure mode and fix.

Suggested change
// because EnsureMinimumRewindBufferSize expands the ring buffer
// to hold the BZip2 block before calling IsTarFile.
// because the rewind ring buffer is allocated with enough capacity
// up front (e.g. via MaximumRewindBufferSize / StartRecording) to hold
// the BZip2 block before calling IsTarFile.

Copilot uses AI. Check for mistakes.
using var fs = File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.bz2"));
using var nonSeekable = new ForwardOnlyStream(fs);
using var reader = ReaderFactory.OpenReader(nonSeekable);
var entryCount = 0;
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
entryCount++;
}
}
Assert.True(entryCount > 0);
}

[Fact]
public void TarWrapper_BZip2_MinimumRewindBufferSize_IsMaxBZip2BlockSize()
{
// The BZip2 TarWrapper must declare a MinimumRewindBufferSize large enough
// to hold an entire maximum-size compressed BZip2 block (9 × 100 000 bytes).
var bzip2Wrapper = Array.Find(
TarWrapper.Wrappers,
w => w.CompressionType == CompressionType.BZip2
);
Assert.NotNull(bzip2Wrapper);
Assert.Equal(BZip2Constants.baseBlockSize * 9, bzip2Wrapper.MinimumRewindBufferSize);
}

[Fact]
public void TarWrapper_Default_MinimumRewindBufferSize_Is_DefaultRewindableBufferSize()
{
// Non-BZip2 wrappers that don't specify a custom size default to
// Constants.RewindableBufferSize so existing behaviour is unchanged.
var noneWrapper = Array.Find(
TarWrapper.Wrappers,
w => w.CompressionType == CompressionType.None
);
Assert.NotNull(noneWrapper);
Assert.Equal(Common.Constants.RewindableBufferSize, noneWrapper.MinimumRewindBufferSize);
}

[Fact]
public void Tar_BZip2_Entry_Stream()
{
Expand Down
Loading