Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions src/SharpCompress/Common/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ public static class Constants
/// by rewinding and re-reading the same data.
/// </para>
/// <para>
/// <b>Default:</b> 163840 bytes (160KB) - sized to cover ZStandard's worst-case
/// first block on a tar archive (~131KB including frame header overhead).
/// ZStandard blocks can be up to 128KB, exceeding the previous 81KB default.
/// <b>Default:</b> 81920 bytes (80KB) — sufficient for most formats.
/// Formats that require larger buffers (e.g. BZip2, ZStandard) declare their
/// own minimum via <c>TarWrapper.MinimumRewindBufferSize</c>, and
/// <c>TarWrapper.MaximumRewindBufferSize</c> is used at stream construction
/// to ensure the correct capacity is allocated upfront.
/// </para>
/// <para>
/// <b>Typical usage:</b> 500-1000 bytes for most archives
Expand All @@ -41,7 +43,7 @@ public static class Constants
/// </list>
/// </para>
/// </remarks>
public static int RewindableBufferSize { get; set; } = 163840;
public static int RewindableBufferSize { get; set; } = 81920;

public static CultureInfo DefaultCultureInfo { get; set; } = CultureInfo.InvariantCulture;
}
13 changes: 13 additions & 0 deletions src/SharpCompress/Compressors/ZStandard/ZstandardConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,17 @@ internal class ZstandardConstants
/// Magic number found at start of ZStandard frame: 0xFD 0x2F 0xB5 0x28
/// </summary>
public const uint MAGIC = 0xFD2FB528;

/// <summary>
/// Maximum uncompressed size of a single ZStandard block: ZSTD_BLOCKSIZE_MAX = 128 KB.
/// </summary>
public const int BlockSizeMax = 1 << 17; // 131072 bytes

/// <summary>
/// Recommended input (compressed) buffer size for streaming decompression:
/// ZSTD_DStreamInSize = ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize (3 bytes).
/// The ring buffer must be at least this large to hold the compressed bytes read
/// during format detection before the first rewind.
/// </summary>
public const int DStreamInSize = BlockSizeMax + 3;
}
8 changes: 4 additions & 4 deletions src/SharpCompress/Factories/TarFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ public override bool IsArchive(Stream stream, string? password = null)
{
var providers = CompressionProviderRegistry.Default;
var sharpCompressStream = new SharpCompressStream(stream);
sharpCompressStream.StartRecording();
sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize);
foreach (var wrapper in TarWrapper.Wrappers)
{
sharpCompressStream.Rewind();
Expand Down Expand Up @@ -84,7 +84,7 @@ public override async ValueTask<bool> IsArchiveAsync(
{
var providers = CompressionProviderRegistry.Default;
var sharpCompressStream = new SharpCompressStream(stream);
sharpCompressStream.StartRecording();
sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize);
foreach (var wrapper in TarWrapper.Wrappers)
{
sharpCompressStream.Rewind();
Expand Down Expand Up @@ -319,7 +319,7 @@ public IReader OpenReader(Stream stream, ReaderOptions? options)
{
options ??= new ReaderOptions();
var sharpCompressStream = new SharpCompressStream(stream);
sharpCompressStream.StartRecording();
sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize);
foreach (var wrapper in TarWrapper.Wrappers)
{
sharpCompressStream.Rewind();
Expand Down Expand Up @@ -352,7 +352,7 @@ public async ValueTask<IAsyncReader> OpenAsyncReader(
cancellationToken.ThrowIfCancellationRequested();
options ??= new ReaderOptions();
var sharpCompressStream = new SharpCompressStream(stream);
sharpCompressStream.StartRecording();
sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize);
foreach (var wrapper in TarWrapper.Wrappers)
{
sharpCompressStream.Rewind();
Expand Down
45 changes: 42 additions & 3 deletions src/SharpCompress/Factories/TarWrapper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,24 @@ public class TarWrapper(
Func<Stream, Stream> createStream,
Func<Stream, CancellationToken, ValueTask<Stream>> createStreamAsync,
IEnumerable<string> knownExtensions,
bool wrapInSharpCompressStream = true
bool wrapInSharpCompressStream = true,
int? minimumRewindBufferSize = null
)
{
public CompressionType CompressionType { get; } = type;
public Func<Stream, bool> IsMatch { get; } = canHandle;
public Func<Stream, CancellationToken, ValueTask<bool>> IsMatchAsync { get; } = canHandleAsync;
public bool WrapInSharpCompressStream { get; } = wrapInSharpCompressStream;

/// <summary>
/// The minimum ring buffer size required to detect and probe this format.
/// Format detection reads a decompressed block to check the tar header, so
/// the ring buffer must be large enough to hold the compressed bytes consumed
/// during that probe. Defaults to <see cref="Common.Constants.RewindableBufferSize"/>.
/// </summary>
public int MinimumRewindBufferSize { get; } =
minimumRewindBufferSize ?? Common.Constants.RewindableBufferSize;

public Func<Stream, Stream> CreateStream { get; } = createStream;
public Func<Stream, CancellationToken, ValueTask<Stream>> CreateStreamAsync { get; } =
createStreamAsync;
Expand Down Expand Up @@ -57,7 +67,11 @@ public class TarWrapper(
await BZip2Stream
.CreateAsync(stream, CompressionMode.Decompress, false)
.ConfigureAwait(false),
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"]
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"],
// BZip2 decompresses in whole blocks; the compressed size of the first block
// can be close to the uncompressed maximum (9 × 100 000 = 900 000 bytes).
// The ring buffer must hold all compressed bytes read during format detection.
minimumRewindBufferSize: BZip2Constants.baseBlockSize * 9
),
new(
CompressionType.GZip,
Expand All @@ -74,7 +88,11 @@ await BZip2Stream
ZStandardStream.IsZStandardAsync,
(stream) => new ZStandardStream(stream),
(stream, _) => new ValueTask<Stream>(new ZStandardStream(stream)),
["tar.zst", "tar.zstd", "tzst", "tzstd"]
["tar.zst", "tar.zstd", "tzst", "tzstd"],
// ZStandard decompresses in blocks; the compressed size of the first block
// can be up to ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize = 131075 bytes.
// The ring buffer must hold all compressed bytes read during format detection.
minimumRewindBufferSize: ZstandardConstants.DStreamInSize
),
new(
CompressionType.LZip,
Expand Down Expand Up @@ -104,4 +122,25 @@ await BZip2Stream
false
),
];

/// <summary>
/// The largest <see cref="MinimumRewindBufferSize"/> across all registered wrappers.
/// Use this as the ring buffer size when creating a stream for Tar format detection so
/// that the buffer is sized correctly at construction and never needs to be reallocated.
/// </summary>
public static int MaximumRewindBufferSize { get; } = GetMaximumRewindBufferSize();

// Computed after Wrappers is initialised so the static initialisation order is safe.
private static int GetMaximumRewindBufferSize()
{
var max = 0;
foreach (var w in Wrappers)
{
if (w.MinimumRewindBufferSize > max)
{
max = w.MinimumRewindBufferSize;
}
}
return max;
}
}
3 changes: 2 additions & 1 deletion src/SharpCompress/IO/SeekableSharpCompressStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ public override void Rewind(bool stopRecording = false)
}
}

public override void StartRecording() => _recordedPosition = _stream.Position;
public override void StartRecording(int? minBufferSize = null) =>
_recordedPosition = _stream.Position;

public override void StopRecording() => _recordedPosition = null;

Expand Down
20 changes: 17 additions & 3 deletions src/SharpCompress/IO/SharpCompressStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,17 @@ public virtual void StopRecording()
// (frozen recording mode) until Rewind(stopRecording: true) is called
}

public virtual void StartRecording()
/// <summary>
/// Begins recording reads so that <see cref="Rewind()"/> can replay them.
/// </summary>
/// <param name="minBufferSize">
/// Minimum ring buffer capacity in bytes. When provided and larger than
/// <see cref="Common.Constants.RewindableBufferSize"/>, the ring buffer is allocated
/// with this size. Pass the largest amount of compressed data that may be consumed
/// during format detection before the first rewind. Defaults to
/// <see cref="Common.Constants.RewindableBufferSize"/> when null or not supplied.
/// </param>
public virtual void StartRecording(int? minBufferSize = null)
{
if (_isPassthrough)
{
Expand All @@ -190,10 +200,14 @@ public virtual void StartRecording()
);
}

// Ensure ring buffer exists
// Allocate ring buffer with the requested minimum size (at least the global default).
if (_ringBuffer is null)
{
_ringBuffer = new RingBuffer(Constants.RewindableBufferSize);
var size =
minBufferSize.GetValueOrDefault() > Constants.RewindableBufferSize
? minBufferSize.GetValueOrDefault()
: Constants.RewindableBufferSize;
_ringBuffer = new RingBuffer(size);
}

// Mark current position as recording anchor
Expand Down
11 changes: 9 additions & 2 deletions src/SharpCompress/Readers/Tar/TarReader.Factory.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
Expand Down Expand Up @@ -91,7 +92,10 @@ public static async ValueTask<IAsyncReader> OpenAsyncReader(
readerOptions ??= new ReaderOptions();
var sharpCompressStream = SharpCompressStream.Create(
stream,
bufferSize: readerOptions.RewindableBufferSize
bufferSize: Math.Max(
readerOptions.RewindableBufferSize ?? 0,
TarWrapper.MaximumRewindBufferSize
)
);
long pos = sharpCompressStream.Position;
foreach (var wrapper in TarWrapper.Wrappers)
Expand Down Expand Up @@ -170,7 +174,10 @@ public static IReader OpenReader(Stream stream, ReaderOptions? readerOptions = n
readerOptions ??= new ReaderOptions();
var sharpCompressStream = SharpCompressStream.Create(
stream,
bufferSize: readerOptions.RewindableBufferSize
bufferSize: Math.Max(
readerOptions.RewindableBufferSize ?? 0,
TarWrapper.MaximumRewindBufferSize
)
);
long pos = sharpCompressStream.Position;
foreach (var wrapper in TarWrapper.Wrappers)
Expand Down
54 changes: 54 additions & 0 deletions tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,58 @@ public void Position_SetWithinRecordedRange_Succeeds()
Assert.Equal(3, readBuffer[0]);
Assert.Equal(4, readBuffer[1]);
}

[Fact]
public void StartRecording_WithLargerMinBufferSize_AllowsLargeRewind()
{
// Simulates the BZip2 scenario: the ring buffer must be large enough
// from the moment StartRecording is called so that a large probe read
// (up to 900 KB for BZip2) can be rewound without buffer overflow.
const int largeSize = 100;
const int largeReadSize = 80;

var data = new byte[100];
for (var i = 0; i < data.Length; i++)
{
data[i] = (byte)(i + 1);
}

var ms = new MemoryStream(data);
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs, largeSize);

// Pass the required size upfront — no expansion needed later
stream.StartRecording(largeSize);

// Read a large amount (simulating BZip2 block decompression during IsTarFile probe)
var largeBuffer = new byte[largeReadSize];
stream.Read(largeBuffer, 0, largeReadSize);

// Rewind must succeed because the buffer was large enough from the start
stream.Rewind();

var verifyBuffer = new byte[largeReadSize];
stream.Read(verifyBuffer, 0, largeReadSize);
Assert.Equal(data[0], verifyBuffer[0]);
Assert.Equal(data[largeReadSize - 1], verifyBuffer[largeReadSize - 1]);
}

[Fact]
public void StartRecording_DefaultSize_UsesConstantsRewindableBufferSize()
{
// When no minimum is specified StartRecording uses the global default.
var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5 });
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs);
stream.StartRecording();

var buffer = new byte[5];
stream.Read(buffer, 0, 5);
stream.Rewind();

var readBuffer = new byte[5];
stream.Read(readBuffer, 0, 5);
Assert.Equal(1, readBuffer[0]);
Assert.Equal(5, readBuffer[4]);
}
}
48 changes: 48 additions & 0 deletions tests/SharpCompress.Test/Tar/TarReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
using System.Collections.Generic;
using System.IO;
using SharpCompress.Common;
using SharpCompress.Compressors.BZip2;
using SharpCompress.Factories;
using SharpCompress.Readers;
using SharpCompress.Readers.Tar;
using SharpCompress.Test.Mocks;
Expand Down Expand Up @@ -58,6 +60,52 @@ public void Tar_Skip()
[Fact]
public void Tar_GZip_OldGnu_Reader() => Read("Tar.oldgnu.tar.gz", CompressionType.GZip);

[Fact]
public void Tar_BZip2_Reader_NonSeekable()
{
// Regression test for: Dynamic default RingBuffer for BZip2
// Opening a .tar.bz2 from a non-seekable stream should succeed
// because the ring buffer is sized to hold the BZip2 block before calling IsTarFile.
using var fs = File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.bz2"));
using var nonSeekable = new ForwardOnlyStream(fs);
using var reader = ReaderFactory.OpenReader(nonSeekable);
var entryCount = 0;
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
entryCount++;
}
}
Assert.True(entryCount > 0);
}

[Fact]
public void TarWrapper_BZip2_MinimumRewindBufferSize_IsMaxBZip2BlockSize()
{
// The BZip2 TarWrapper must declare a MinimumRewindBufferSize large enough
// to hold an entire maximum-size compressed BZip2 block (9 × 100 000 bytes).
var bzip2Wrapper = Array.Find(
TarWrapper.Wrappers,
w => w.CompressionType == CompressionType.BZip2
);
Assert.NotNull(bzip2Wrapper);
Assert.Equal(BZip2Constants.baseBlockSize * 9, bzip2Wrapper.MinimumRewindBufferSize);
}

[Fact]
public void TarWrapper_Default_MinimumRewindBufferSize_Is_DefaultRewindableBufferSize()
{
// Non-BZip2 wrappers that don't specify a custom size default to
// Constants.RewindableBufferSize so existing behaviour is unchanged.
var noneWrapper = Array.Find(
TarWrapper.Wrappers,
w => w.CompressionType == CompressionType.None
);
Assert.NotNull(noneWrapper);
Assert.Equal(Common.Constants.RewindableBufferSize, noneWrapper.MinimumRewindBufferSize);
}

[Fact]
public void Tar_BZip2_Entry_Stream()
{
Expand Down
Loading