Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/SharpCompress/Common/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ public static class Constants
/// by rewinding and re-reading the same data.
/// </para>
/// <para>
/// <b>Default:</b> 163840 bytes (160KB) - sized to cover ZStandard's worst-case
/// first block on a tar archive (~131KB including frame header overhead).
/// ZStandard blocks can be up to 128KB, exceeding the previous 81KB default.
/// <b>Default:</b> 81920 bytes (80KB) — sufficient for most formats.
/// Formats that require larger buffers (e.g. BZip2, ZStandard) declare their
/// own minimum via <c>TarWrapper.MinimumRewindBufferSize</c>, and
/// <c>TarWrapper.MaximumRewindBufferSize</c> is used at stream construction
/// to ensure the correct capacity is allocated upfront.
/// </para>
/// <para>
/// <b>Typical usage:</b> 500-1000 bytes for most archives
Expand All @@ -41,7 +43,7 @@ public static class Constants
/// </list>
/// </para>
/// </remarks>
public static int RewindableBufferSize { get; set; } = 163840;
public static int RewindableBufferSize { get; set; } = 81920;

public static CultureInfo DefaultCultureInfo { get; set; } = CultureInfo.InvariantCulture;
}
13 changes: 13 additions & 0 deletions src/SharpCompress/Compressors/ZStandard/ZstandardConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,17 @@ internal class ZstandardConstants
/// Magic number found at start of ZStandard frame: 0xFD 0x2F 0xB5 0x28
/// </summary>
public const uint MAGIC = 0xFD2FB528;

/// <summary>
/// Maximum uncompressed size of a single ZStandard block: ZSTD_BLOCKSIZE_MAX = 128 KB.
/// </summary>
public const int BlockSizeMax = 1 << 17; // 131072 bytes

/// <summary>
/// Recommended input (compressed) buffer size for streaming decompression:
/// ZSTD_DStreamInSize = ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize (3 bytes).
/// The ring buffer must be at least this large to hold the compressed bytes read
/// during format detection before the first rewind.
/// </summary>
public const int DStreamInSize = BlockSizeMax + 3;
}
8 changes: 4 additions & 4 deletions src/SharpCompress/Factories/TarFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public override bool IsArchive(Stream stream, string? password = null)
{
var providers = CompressionProviderRegistry.Default;
var sharpCompressStream = new SharpCompressStream(stream);
sharpCompressStream.StartRecording();
sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize);
foreach (var wrapper in TarWrapper.Wrappers)
{
sharpCompressStream.Rewind();
Expand Down Expand Up @@ -84,7 +84,7 @@ public override async ValueTask<bool> IsArchiveAsync(
{
var providers = CompressionProviderRegistry.Default;
var sharpCompressStream = new SharpCompressStream(stream);
sharpCompressStream.StartRecording();
sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize);
foreach (var wrapper in TarWrapper.Wrappers)
{
sharpCompressStream.Rewind();
Expand Down Expand Up @@ -319,7 +319,7 @@ public IReader OpenReader(Stream stream, ReaderOptions? options)
{
options ??= new ReaderOptions();
var sharpCompressStream = new SharpCompressStream(stream);
sharpCompressStream.StartRecording();
sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize);
foreach (var wrapper in TarWrapper.Wrappers)
{
sharpCompressStream.Rewind();
Expand Down Expand Up @@ -352,7 +352,7 @@ public async ValueTask<IAsyncReader> OpenAsyncReader(
cancellationToken.ThrowIfCancellationRequested();
options ??= new ReaderOptions();
var sharpCompressStream = new SharpCompressStream(stream);
sharpCompressStream.StartRecording();
sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize);
foreach (var wrapper in TarWrapper.Wrappers)
{
sharpCompressStream.Rewind();
Expand Down
45 changes: 42 additions & 3 deletions src/SharpCompress/Factories/TarWrapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,24 @@ public class TarWrapper(
Func<Stream, Stream> createStream,
Func<Stream, CancellationToken, ValueTask<Stream>> createStreamAsync,
IEnumerable<string> knownExtensions,
bool wrapInSharpCompressStream = true
bool wrapInSharpCompressStream = true,
int? minimumRewindBufferSize = null
)
{
public CompressionType CompressionType { get; } = type;
public Func<Stream, bool> IsMatch { get; } = canHandle;
public Func<Stream, CancellationToken, ValueTask<bool>> IsMatchAsync { get; } = canHandleAsync;
public bool WrapInSharpCompressStream { get; } = wrapInSharpCompressStream;

/// <summary>
/// The minimum ring buffer size required to detect and probe this format.
/// Format detection reads a decompressed block to check the tar header, so
/// the ring buffer must be large enough to hold the compressed bytes consumed
/// during that probe. Defaults to <see cref="Common.Constants.RewindableBufferSize"/>.
/// </summary>
public int MinimumRewindBufferSize { get; } =
minimumRewindBufferSize ?? Common.Constants.RewindableBufferSize;

public Func<Stream, Stream> CreateStream { get; } = createStream;
public Func<Stream, CancellationToken, ValueTask<Stream>> CreateStreamAsync { get; } =
createStreamAsync;
Expand Down Expand Up @@ -57,7 +67,11 @@ public class TarWrapper(
await BZip2Stream
.CreateAsync(stream, CompressionMode.Decompress, false)
.ConfigureAwait(false),
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"]
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"],
// BZip2 decompresses in whole blocks; the compressed size of the first block
// can be close to the uncompressed maximum (9 × 100 000 = 900 000 bytes).
// The ring buffer must hold all compressed bytes read during format detection.
minimumRewindBufferSize: BZip2Constants.baseBlockSize * 9
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot are there more kinds than just BZip2 to worry about? Zstandard?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only BZip2 needs the enlarged buffer. The other formats all decompress incrementally:

  • ZStandard reads from the source in chunks of ZSTD_DStreamInSize() ≈ 128 KB and produces output from each chunk before reading more. The default 160 KB ring buffer is larger, so no special treatment is needed.
  • GZip (DEFLATE), LZip (LZMA), XZ (LZMA2), and LZW are all streaming/dictionary-based decompressors that produce output while consuming input — well within the default 160 KB buffer.

BZip2 is unique in that it must buffer and decode an entire compressed block before producing any output, and at compression level 9 that block can be up to 900 KB of compressed data.

),
new(
CompressionType.GZip,
Expand All @@ -74,7 +88,11 @@ await BZip2Stream
ZStandardStream.IsZStandardAsync,
(stream) => new ZStandardStream(stream),
(stream, _) => new ValueTask<Stream>(new ZStandardStream(stream)),
["tar.zst", "tar.zstd", "tzst", "tzstd"]
["tar.zst", "tar.zstd", "tzst", "tzstd"],
// ZStandard decompresses in blocks; the compressed size of the first block
// can be up to ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize = 131075 bytes.
// The ring buffer must hold all compressed bytes read during format detection.
minimumRewindBufferSize: ZstandardConstants.DStreamInSize
),
new(
CompressionType.LZip,
Expand Down Expand Up @@ -104,4 +122,25 @@ await BZip2Stream
false
),
];

/// <summary>
/// The largest <see cref="MinimumRewindBufferSize"/> across all registered wrappers.
/// Use this as the ring buffer size when creating a stream for Tar format detection so
/// that the buffer is sized correctly at construction and never needs to be reallocated.
/// </summary>
public static int MaximumRewindBufferSize { get; } = GetMaximumRewindBufferSize();

// Computed after Wrappers is initialised so the static initialisation order is safe.
private static int GetMaximumRewindBufferSize()
{
var max = 0;
foreach (var w in Wrappers)
{
if (w.MinimumRewindBufferSize > max)
{
max = w.MinimumRewindBufferSize;
}
}
return max;
}
}
3 changes: 2 additions & 1 deletion src/SharpCompress/IO/SeekableSharpCompressStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ public override void Rewind(bool stopRecording = false)
}
}

public override void StartRecording() => _recordedPosition = _stream.Position;
public override void StartRecording(int? minBufferSize = null) =>
_recordedPosition = _stream.Position;

public override void StopRecording() => _recordedPosition = null;

Expand Down
20 changes: 17 additions & 3 deletions src/SharpCompress/IO/SharpCompressStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,17 @@ public virtual void StopRecording()
// (frozen recording mode) until Rewind(stopRecording: true) is called
}

public virtual void StartRecording()
/// <summary>
/// Begins recording reads so that <see cref="Rewind()"/> can replay them.
/// </summary>
/// <param name="minBufferSize">
/// Minimum ring buffer capacity in bytes. When provided and larger than
/// <see cref="Common.Constants.RewindableBufferSize"/>, the ring buffer is allocated
/// with this size. Pass the largest amount of compressed data that may be consumed
/// during format detection before the first rewind. Defaults to
/// <see cref="Common.Constants.RewindableBufferSize"/> when null or not supplied.
/// </param>
public virtual void StartRecording(int? minBufferSize = null)
{
if (_isPassthrough)
{
Expand All @@ -190,10 +200,14 @@ public virtual void StartRecording()
);
}

// Ensure ring buffer exists
// Allocate ring buffer with the requested minimum size (at least the global default).
if (_ringBuffer is null)
{
_ringBuffer = new RingBuffer(Constants.RewindableBufferSize);
var size =
minBufferSize.GetValueOrDefault() > Constants.RewindableBufferSize
? minBufferSize.GetValueOrDefault()
: Constants.RewindableBufferSize;
_ringBuffer = new RingBuffer(size);
}
Comment on lines 188 to 211
Copy link

Copilot AI Apr 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new StartRecording(minBufferSize) only allocates a ring buffer when _ringBuffer is null. For non-seekable streams created via SharpCompressStream.Create(...), _ringBuffer is typically preallocated in the factory constructor, so a larger minBufferSize passed here will be silently ignored even if the existing buffer is smaller than the requested minimum. Consider either (a) validating that the existing ring buffer length meets the requested minimum and throwing a clear exception if not, or (b) ensuring callers that need a larger buffer always pass it at Create-time and documenting that StartRecording cannot increase an already-allocated buffer.

Copilot uses AI. Check for mistakes.

// Mark current position as recording anchor
Expand Down
11 changes: 9 additions & 2 deletions src/SharpCompress/Readers/Tar/TarReader.Factory.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -91,7 +92,10 @@ public static async ValueTask<IAsyncReader> OpenAsyncReader(
readerOptions ??= new ReaderOptions();
var sharpCompressStream = SharpCompressStream.Create(
stream,
bufferSize: readerOptions.RewindableBufferSize
bufferSize: Math.Max(
readerOptions.RewindableBufferSize ?? 0,
TarWrapper.MaximumRewindBufferSize
)
);
Comment on lines +95 to 99
Copy link

Copilot AI Apr 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TarReader.Factory now forces the rewind buffer size to at least TarWrapper.MaximumRewindBufferSize (currently 900,000 bytes) on non-seekable streams, even when the caller explicitly sets ReaderOptions.RewindableBufferSize smaller. This is a noticeable memory increase for non-BZip2 tar streams (e.g., tar.gz). If possible, consider honoring a smaller caller-specified size for formats that don’t require the maximum, or document that TarReader will always allocate at least this amount for non-seekable streams.

Suggested change
bufferSize: Math.Max(
readerOptions.RewindableBufferSize ?? 0,
TarWrapper.MaximumRewindBufferSize
)
);
bufferSize: readerOptions.RewindableBufferSize ?? TarWrapper.MaximumRewindBufferSize
);

Copilot uses AI. Check for mistakes.
long pos = sharpCompressStream.Position;
foreach (var wrapper in TarWrapper.Wrappers)
Expand Down Expand Up @@ -170,7 +174,10 @@ public static IReader OpenReader(Stream stream, ReaderOptions? readerOptions = n
readerOptions ??= new ReaderOptions();
var sharpCompressStream = SharpCompressStream.Create(
stream,
bufferSize: readerOptions.RewindableBufferSize
bufferSize: Math.Max(
readerOptions.RewindableBufferSize ?? 0,
TarWrapper.MaximumRewindBufferSize
)
);
long pos = sharpCompressStream.Position;
foreach (var wrapper in TarWrapper.Wrappers)
Expand Down
12 changes: 6 additions & 6 deletions src/SharpCompress/packages.lock.json
Original file line number Diff line number Diff line change
Expand Up @@ -268,9 +268,9 @@
"net10.0": {
"Microsoft.NET.ILLink.Tasks": {
"type": "Direct",
"requested": "[10.0.2, )",
"resolved": "10.0.2",
"contentHash": "sXdDtMf2qcnbygw9OdE535c2lxSxrZP8gO4UhDJ0xiJbl1wIqXS1OTcTDFTIJPOFd6Mhcm8gPEthqWGUxBsTqw=="
"requested": "[10.0.5, )",
"resolved": "10.0.5",
"contentHash": "A+5ZuQ0f449tM+MQrhf6R9ZX7lYpjk/ODEwLYKrnF6111rtARx8fVsm4YznUnQiKnnXfaXNBqgxmil6RW3L3SA=="
},
"Microsoft.NETFramework.ReferenceAssemblies": {
"type": "Direct",
Expand Down Expand Up @@ -442,9 +442,9 @@
"net8.0": {
"Microsoft.NET.ILLink.Tasks": {
"type": "Direct",
"requested": "[8.0.23, )",
"resolved": "8.0.23",
"contentHash": "GqHiB1HbbODWPbY/lc5xLQH8siEEhNA0ptpJCC6X6adtAYNEzu5ZlqV3YHA3Gh7fuEwgA8XqVwMtH2KNtuQM1Q=="
"requested": "[8.0.25, )",
"resolved": "8.0.25",
"contentHash": "sqX4nmBft05ivqKvUT4nxaN8rT3apCLt9SWFkfRrQPwra1zPwFknQAw1lleuMCKOCLvVmOWwrC2iPSm9RiXZUg=="
},
"Microsoft.NETFramework.ReferenceAssemblies": {
"type": "Direct",
Expand Down
54 changes: 54 additions & 0 deletions tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,58 @@ public void Position_SetWithinRecordedRange_Succeeds()
Assert.Equal(3, readBuffer[0]);
Assert.Equal(4, readBuffer[1]);
}

[Fact]
public void StartRecording_WithLargerMinBufferSize_AllowsLargeRewind()
{
// Simulates the BZip2 scenario: the ring buffer must be large enough
// from the moment StartRecording is called so that a large probe read
// (up to 900 KB for BZip2) can be rewound without buffer overflow.
const int largeSize = 100;
const int largeReadSize = 80;

var data = new byte[100];
for (var i = 0; i < data.Length; i++)
{
data[i] = (byte)(i + 1);
}

var ms = new MemoryStream(data);
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs, largeSize);

// Pass the required size upfront — no expansion needed later
Comment on lines +148 to +150
Copy link

Copilot AI Apr 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test doesn’t currently exercise the new StartRecording(minBufferSize) allocation path: SharpCompressStream.Create(nonSeekableMs, largeSize) already preallocates the ring buffer with largeSize, so StartRecording(largeSize) is effectively redundant. To actually validate the new behavior, construct the stream in a way that leaves _ringBuffer null until StartRecording (or start with a smaller buffer and assert the expected behavior when a larger minimum is requested).

Suggested change
var stream = SharpCompressStream.Create(nonSeekableMs, largeSize);
// Pass the required size upfront — no expansion needed later
var stream = SharpCompressStream.Create(nonSeekableMs);
// Pass the required size upfront — forces allocation to at least largeSize

Copilot uses AI. Check for mistakes.
stream.StartRecording(largeSize);

// Read a large amount (simulating BZip2 block decompression during IsTarFile probe)
var largeBuffer = new byte[largeReadSize];
stream.Read(largeBuffer, 0, largeReadSize);

// Rewind must succeed because the buffer was large enough from the start
stream.Rewind();

var verifyBuffer = new byte[largeReadSize];
stream.Read(verifyBuffer, 0, largeReadSize);
Assert.Equal(data[0], verifyBuffer[0]);
Assert.Equal(data[largeReadSize - 1], verifyBuffer[largeReadSize - 1]);
}

[Fact]
public void StartRecording_DefaultSize_UsesConstantsRewindableBufferSize()
{
// When no minimum is specified StartRecording uses the global default.
var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5 });
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs);
stream.StartRecording();

var buffer = new byte[5];
stream.Read(buffer, 0, 5);
stream.Rewind();

var readBuffer = new byte[5];
stream.Read(readBuffer, 0, 5);
Assert.Equal(1, readBuffer[0]);
Assert.Equal(5, readBuffer[4]);
}
}
49 changes: 49 additions & 0 deletions tests/SharpCompress.Test/Tar/TarReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
using System.Collections.Generic;
using System.IO;
using SharpCompress.Common;
using SharpCompress.Compressors.BZip2;
using SharpCompress.Factories;
using SharpCompress.Readers;
using SharpCompress.Readers.Tar;
using SharpCompress.Test.Mocks;
Expand Down Expand Up @@ -58,6 +60,53 @@ public void Tar_Skip()
[Fact]
public void Tar_GZip_OldGnu_Reader() => Read("Tar.oldgnu.tar.gz", CompressionType.GZip);

[Fact]
public void Tar_BZip2_Reader_NonSeekable()
{
// Regression test for: Dynamic default RingBuffer for BZip2
// Opening a .tar.bz2 from a non-seekable stream should succeed
// because EnsureMinimumRewindBufferSize expands the ring buffer
// to hold the BZip2 block before calling IsTarFile.
Comment on lines +68 to +69
Copy link

Copilot AI Apr 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This regression test comment references EnsureMinimumRewindBufferSize expanding the ring buffer, but there is no such API in the codebase and the implementation appears to rely on allocating the correct buffer size up-front (e.g., via TarWrapper.MaximumRewindBufferSize / StartRecording(minBufferSize)). Please update the comment to match the actual mechanism so the test remains an accurate explanation of the failure mode and fix.

Suggested change
// because EnsureMinimumRewindBufferSize expands the ring buffer
// to hold the BZip2 block before calling IsTarFile.
// because the rewind ring buffer is allocated with enough capacity
// up front (e.g. via MaximumRewindBufferSize / StartRecording) to hold
// the BZip2 block before calling IsTarFile.

Copilot uses AI. Check for mistakes.
using var fs = File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.bz2"));
using var nonSeekable = new ForwardOnlyStream(fs);
using var reader = ReaderFactory.OpenReader(nonSeekable);
var entryCount = 0;
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
entryCount++;
}
}
Assert.True(entryCount > 0);
}

[Fact]
public void TarWrapper_BZip2_MinimumRewindBufferSize_IsMaxBZip2BlockSize()
{
// The BZip2 TarWrapper must declare a MinimumRewindBufferSize large enough
// to hold an entire maximum-size compressed BZip2 block (9 × 100 000 bytes).
var bzip2Wrapper = Array.Find(
TarWrapper.Wrappers,
w => w.CompressionType == CompressionType.BZip2
);
Assert.NotNull(bzip2Wrapper);
Assert.Equal(BZip2Constants.baseBlockSize * 9, bzip2Wrapper.MinimumRewindBufferSize);
}

[Fact]
public void TarWrapper_Default_MinimumRewindBufferSize_Is_DefaultRewindableBufferSize()
{
// Non-BZip2 wrappers that don't specify a custom size default to
// Constants.RewindableBufferSize so existing behaviour is unchanged.
var noneWrapper = Array.Find(
TarWrapper.Wrappers,
w => w.CompressionType == CompressionType.None
);
Assert.NotNull(noneWrapper);
Assert.Equal(Common.Constants.RewindableBufferSize, noneWrapper.MinimumRewindBufferSize);
}

[Fact]
public void Tar_BZip2_Entry_Stream()
{
Expand Down
Loading