diff --git a/src/SharpCompress/Common/Constants.cs b/src/SharpCompress/Common/Constants.cs index 9812501da..edd81e083 100644 --- a/src/SharpCompress/Common/Constants.cs +++ b/src/SharpCompress/Common/Constants.cs @@ -22,9 +22,11 @@ public static class Constants /// by rewinding and re-reading the same data. /// /// - /// Default: 163840 bytes (160KB) - sized to cover ZStandard's worst-case - /// first block on a tar archive (~131KB including frame header overhead). - /// ZStandard blocks can be up to 128KB, exceeding the previous 81KB default. + /// Default: 81920 bytes (80KB) — sufficient for most formats. + /// Formats that require larger buffers (e.g. BZip2, ZStandard) declare their + /// own minimum via TarWrapper.MinimumRewindBufferSize, and + /// TarWrapper.MaximumRewindBufferSize is used at stream construction + /// to ensure the correct capacity is allocated upfront. /// /// /// Typical usage: 500-1000 bytes for most archives @@ -41,7 +43,7 @@ public static class Constants /// /// /// - public static int RewindableBufferSize { get; set; } = 163840; + public static int RewindableBufferSize { get; set; } = 81920; public static CultureInfo DefaultCultureInfo { get; set; } = CultureInfo.InvariantCulture; } diff --git a/src/SharpCompress/Compressors/ZStandard/ZstandardConstants.cs b/src/SharpCompress/Compressors/ZStandard/ZstandardConstants.cs index ac4756d48..a43b2ef11 100644 --- a/src/SharpCompress/Compressors/ZStandard/ZstandardConstants.cs +++ b/src/SharpCompress/Compressors/ZStandard/ZstandardConstants.cs @@ -6,4 +6,17 @@ internal class ZstandardConstants /// Magic number found at start of ZStandard frame: 0xFD 0x2F 0xB5 0x28 /// public const uint MAGIC = 0xFD2FB528; + + /// + /// Maximum uncompressed size of a single ZStandard block: ZSTD_BLOCKSIZE_MAX = 128 KB. + /// + public const int BlockSizeMax = 1 << 17; // 131072 bytes + + /// + /// Recommended input (compressed) buffer size for streaming decompression: + /// ZSTD_DStreamInSize = ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize (3 bytes). + /// The ring buffer must be at least this large to hold the compressed bytes read + /// during format detection before the first rewind. + /// + public const int DStreamInSize = BlockSizeMax + 3; } diff --git a/src/SharpCompress/Factories/TarFactory.cs b/src/SharpCompress/Factories/TarFactory.cs index 28380587b..0a47d3b75 100644 --- a/src/SharpCompress/Factories/TarFactory.cs +++ b/src/SharpCompress/Factories/TarFactory.cs @@ -52,7 +52,7 @@ public override bool IsArchive(Stream stream, string? password = null) { var providers = CompressionProviderRegistry.Default; var sharpCompressStream = new SharpCompressStream(stream); - sharpCompressStream.StartRecording(); + sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize); foreach (var wrapper in TarWrapper.Wrappers) { sharpCompressStream.Rewind(); @@ -84,7 +84,7 @@ public override async ValueTask IsArchiveAsync( { var providers = CompressionProviderRegistry.Default; var sharpCompressStream = new SharpCompressStream(stream); - sharpCompressStream.StartRecording(); + sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize); foreach (var wrapper in TarWrapper.Wrappers) { sharpCompressStream.Rewind(); @@ -319,7 +319,7 @@ public IReader OpenReader(Stream stream, ReaderOptions? options) { options ??= new ReaderOptions(); var sharpCompressStream = new SharpCompressStream(stream); - sharpCompressStream.StartRecording(); + sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize); foreach (var wrapper in TarWrapper.Wrappers) { sharpCompressStream.Rewind(); @@ -352,7 +352,7 @@ public async ValueTask OpenAsyncReader( cancellationToken.ThrowIfCancellationRequested(); options ??= new ReaderOptions(); var sharpCompressStream = new SharpCompressStream(stream); - sharpCompressStream.StartRecording(); + sharpCompressStream.StartRecording(TarWrapper.MaximumRewindBufferSize); foreach (var wrapper in TarWrapper.Wrappers) { sharpCompressStream.Rewind(); diff --git a/src/SharpCompress/Factories/TarWrapper.cs b/src/SharpCompress/Factories/TarWrapper.cs index fee9707e1..465cf836e 100644 --- a/src/SharpCompress/Factories/TarWrapper.cs +++ b/src/SharpCompress/Factories/TarWrapper.cs @@ -22,7 +22,8 @@ public class TarWrapper( Func createStream, Func> createStreamAsync, IEnumerable knownExtensions, - bool wrapInSharpCompressStream = true + bool wrapInSharpCompressStream = true, + int? minimumRewindBufferSize = null ) { public CompressionType CompressionType { get; } = type; @@ -30,6 +31,15 @@ public class TarWrapper( public Func> IsMatchAsync { get; } = canHandleAsync; public bool WrapInSharpCompressStream { get; } = wrapInSharpCompressStream; + /// + /// The minimum ring buffer size required to detect and probe this format. + /// Format detection reads a decompressed block to check the tar header, so + /// the ring buffer must be large enough to hold the compressed bytes consumed + /// during that probe. Defaults to . + /// + public int MinimumRewindBufferSize { get; } = + minimumRewindBufferSize ?? Common.Constants.RewindableBufferSize; + public Func CreateStream { get; } = createStream; public Func> CreateStreamAsync { get; } = createStreamAsync; @@ -57,7 +67,11 @@ public class TarWrapper( await BZip2Stream .CreateAsync(stream, CompressionMode.Decompress, false) .ConfigureAwait(false), - ["tar.bz2", "tb2", "tbz", "tbz2", "tz2"] + ["tar.bz2", "tb2", "tbz", "tbz2", "tz2"], + // BZip2 decompresses in whole blocks; the compressed size of the first block + // can be close to the uncompressed maximum (9 × 100 000 = 900 000 bytes). + // The ring buffer must hold all compressed bytes read during format detection. + minimumRewindBufferSize: BZip2Constants.baseBlockSize * 9 ), new( CompressionType.GZip, @@ -74,7 +88,11 @@ await BZip2Stream ZStandardStream.IsZStandardAsync, (stream) => new ZStandardStream(stream), (stream, _) => new ValueTask(new ZStandardStream(stream)), - ["tar.zst", "tar.zstd", "tzst", "tzstd"] + ["tar.zst", "tar.zstd", "tzst", "tzstd"], + // ZStandard decompresses in blocks; the compressed size of the first block + // can be up to ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize = 131075 bytes. + // The ring buffer must hold all compressed bytes read during format detection. + minimumRewindBufferSize: ZstandardConstants.DStreamInSize ), new( CompressionType.LZip, @@ -104,4 +122,25 @@ await BZip2Stream false ), ]; + + /// + /// The largest across all registered wrappers. + /// Use this as the ring buffer size when creating a stream for Tar format detection so + /// that the buffer is sized correctly at construction and never needs to be reallocated. + /// + public static int MaximumRewindBufferSize { get; } = GetMaximumRewindBufferSize(); + + // Computed after Wrappers is initialised so the static initialisation order is safe. + private static int GetMaximumRewindBufferSize() + { + var max = 0; + foreach (var w in Wrappers) + { + if (w.MinimumRewindBufferSize > max) + { + max = w.MinimumRewindBufferSize; + } + } + return max; + } } diff --git a/src/SharpCompress/IO/SeekableSharpCompressStream.cs b/src/SharpCompress/IO/SeekableSharpCompressStream.cs index 0aba9c124..bad123bb2 100644 --- a/src/SharpCompress/IO/SeekableSharpCompressStream.cs +++ b/src/SharpCompress/IO/SeekableSharpCompressStream.cs @@ -80,7 +80,8 @@ public override void Rewind(bool stopRecording = false) } } - public override void StartRecording() => _recordedPosition = _stream.Position; + public override void StartRecording(int? minBufferSize = null) => + _recordedPosition = _stream.Position; public override void StopRecording() => _recordedPosition = null; diff --git a/src/SharpCompress/IO/SharpCompressStream.cs b/src/SharpCompress/IO/SharpCompressStream.cs index 53e2b864f..26560024f 100644 --- a/src/SharpCompress/IO/SharpCompressStream.cs +++ b/src/SharpCompress/IO/SharpCompressStream.cs @@ -175,7 +175,17 @@ public virtual void StopRecording() // (frozen recording mode) until Rewind(stopRecording: true) is called } - public virtual void StartRecording() + /// + /// Begins recording reads so that can replay them. + /// + /// + /// Minimum ring buffer capacity in bytes. When provided and larger than + /// , the ring buffer is allocated + /// with this size. Pass the largest amount of compressed data that may be consumed + /// during format detection before the first rewind. Defaults to + /// when null or not supplied. + /// + public virtual void StartRecording(int? minBufferSize = null) { if (_isPassthrough) { @@ -190,10 +200,14 @@ public virtual void StartRecording() ); } - // Ensure ring buffer exists + // Allocate ring buffer with the requested minimum size (at least the global default). if (_ringBuffer is null) { - _ringBuffer = new RingBuffer(Constants.RewindableBufferSize); + var size = + minBufferSize.GetValueOrDefault() > Constants.RewindableBufferSize + ? minBufferSize.GetValueOrDefault() + : Constants.RewindableBufferSize; + _ringBuffer = new RingBuffer(size); } // Mark current position as recording anchor diff --git a/src/SharpCompress/Readers/Tar/TarReader.Factory.cs b/src/SharpCompress/Readers/Tar/TarReader.Factory.cs index f94a89dc7..e6e5a7032 100644 --- a/src/SharpCompress/Readers/Tar/TarReader.Factory.cs +++ b/src/SharpCompress/Readers/Tar/TarReader.Factory.cs @@ -1,3 +1,4 @@ +using System; using System.IO; using System.Threading; using System.Threading.Tasks; @@ -91,7 +92,10 @@ public static async ValueTask OpenAsyncReader( readerOptions ??= new ReaderOptions(); var sharpCompressStream = SharpCompressStream.Create( stream, - bufferSize: readerOptions.RewindableBufferSize + bufferSize: Math.Max( + readerOptions.RewindableBufferSize ?? 0, + TarWrapper.MaximumRewindBufferSize + ) ); long pos = sharpCompressStream.Position; foreach (var wrapper in TarWrapper.Wrappers) @@ -170,7 +174,10 @@ public static IReader OpenReader(Stream stream, ReaderOptions? readerOptions = n readerOptions ??= new ReaderOptions(); var sharpCompressStream = SharpCompressStream.Create( stream, - bufferSize: readerOptions.RewindableBufferSize + bufferSize: Math.Max( + readerOptions.RewindableBufferSize ?? 0, + TarWrapper.MaximumRewindBufferSize + ) ); long pos = sharpCompressStream.Position; foreach (var wrapper in TarWrapper.Wrappers) diff --git a/src/SharpCompress/packages.lock.json b/src/SharpCompress/packages.lock.json index b44de28be..7d5f04481 100644 --- a/src/SharpCompress/packages.lock.json +++ b/src/SharpCompress/packages.lock.json @@ -268,9 +268,9 @@ "net10.0": { "Microsoft.NET.ILLink.Tasks": { "type": "Direct", - "requested": "[10.0.2, )", - "resolved": "10.0.2", - "contentHash": "sXdDtMf2qcnbygw9OdE535c2lxSxrZP8gO4UhDJ0xiJbl1wIqXS1OTcTDFTIJPOFd6Mhcm8gPEthqWGUxBsTqw==" + "requested": "[10.0.5, )", + "resolved": "10.0.5", + "contentHash": "A+5ZuQ0f449tM+MQrhf6R9ZX7lYpjk/ODEwLYKrnF6111rtARx8fVsm4YznUnQiKnnXfaXNBqgxmil6RW3L3SA==" }, "Microsoft.NETFramework.ReferenceAssemblies": { "type": "Direct", @@ -442,9 +442,9 @@ "net8.0": { "Microsoft.NET.ILLink.Tasks": { "type": "Direct", - "requested": "[8.0.23, )", - "resolved": "8.0.23", - "contentHash": "GqHiB1HbbODWPbY/lc5xLQH8siEEhNA0ptpJCC6X6adtAYNEzu5ZlqV3YHA3Gh7fuEwgA8XqVwMtH2KNtuQM1Q==" + "requested": "[8.0.25, )", + "resolved": "8.0.25", + "contentHash": "sqX4nmBft05ivqKvUT4nxaN8rT3apCLt9SWFkfRrQPwra1zPwFknQAw1lleuMCKOCLvVmOWwrC2iPSm9RiXZUg==" }, "Microsoft.NETFramework.ReferenceAssemblies": { "type": "Direct", diff --git a/tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs b/tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs index 05d3dc21d..8840a2ad4 100644 --- a/tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs +++ b/tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs @@ -127,4 +127,58 @@ public void Position_SetWithinRecordedRange_Succeeds() Assert.Equal(3, readBuffer[0]); Assert.Equal(4, readBuffer[1]); } + + [Fact] + public void StartRecording_WithLargerMinBufferSize_AllowsLargeRewind() + { + // Simulates the BZip2 scenario: the ring buffer must be large enough + // from the moment StartRecording is called so that a large probe read + // (up to 900 KB for BZip2) can be rewound without buffer overflow. + const int largeSize = 100; + const int largeReadSize = 80; + + var data = new byte[100]; + for (var i = 0; i < data.Length; i++) + { + data[i] = (byte)(i + 1); + } + + var ms = new MemoryStream(data); + var nonSeekableMs = new NonSeekableStreamWrapper(ms); + var stream = SharpCompressStream.Create(nonSeekableMs, largeSize); + + // Pass the required size upfront — no expansion needed later + stream.StartRecording(largeSize); + + // Read a large amount (simulating BZip2 block decompression during IsTarFile probe) + var largeBuffer = new byte[largeReadSize]; + stream.Read(largeBuffer, 0, largeReadSize); + + // Rewind must succeed because the buffer was large enough from the start + stream.Rewind(); + + var verifyBuffer = new byte[largeReadSize]; + stream.Read(verifyBuffer, 0, largeReadSize); + Assert.Equal(data[0], verifyBuffer[0]); + Assert.Equal(data[largeReadSize - 1], verifyBuffer[largeReadSize - 1]); + } + + [Fact] + public void StartRecording_DefaultSize_UsesConstantsRewindableBufferSize() + { + // When no minimum is specified StartRecording uses the global default. + var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5 }); + var nonSeekableMs = new NonSeekableStreamWrapper(ms); + var stream = SharpCompressStream.Create(nonSeekableMs); + stream.StartRecording(); + + var buffer = new byte[5]; + stream.Read(buffer, 0, 5); + stream.Rewind(); + + var readBuffer = new byte[5]; + stream.Read(readBuffer, 0, 5); + Assert.Equal(1, readBuffer[0]); + Assert.Equal(5, readBuffer[4]); + } } diff --git a/tests/SharpCompress.Test/Tar/TarReaderTests.cs b/tests/SharpCompress.Test/Tar/TarReaderTests.cs index 21c62f837..8cc0b6a38 100644 --- a/tests/SharpCompress.Test/Tar/TarReaderTests.cs +++ b/tests/SharpCompress.Test/Tar/TarReaderTests.cs @@ -2,6 +2,8 @@ using System.Collections.Generic; using System.IO; using SharpCompress.Common; +using SharpCompress.Compressors.BZip2; +using SharpCompress.Factories; using SharpCompress.Readers; using SharpCompress.Readers.Tar; using SharpCompress.Test.Mocks; @@ -58,6 +60,53 @@ public void Tar_Skip() [Fact] public void Tar_GZip_OldGnu_Reader() => Read("Tar.oldgnu.tar.gz", CompressionType.GZip); + [Fact] + public void Tar_BZip2_Reader_NonSeekable() + { + // Regression test for: Dynamic default RingBuffer for BZip2 + // Opening a .tar.bz2 from a non-seekable stream should succeed + // because EnsureMinimumRewindBufferSize expands the ring buffer + // to hold the BZip2 block before calling IsTarFile. + using var fs = File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.bz2")); + using var nonSeekable = new ForwardOnlyStream(fs); + using var reader = ReaderFactory.OpenReader(nonSeekable); + var entryCount = 0; + while (reader.MoveToNextEntry()) + { + if (!reader.Entry.IsDirectory) + { + entryCount++; + } + } + Assert.True(entryCount > 0); + } + + [Fact] + public void TarWrapper_BZip2_MinimumRewindBufferSize_IsMaxBZip2BlockSize() + { + // The BZip2 TarWrapper must declare a MinimumRewindBufferSize large enough + // to hold an entire maximum-size compressed BZip2 block (9 × 100 000 bytes). + var bzip2Wrapper = Array.Find( + TarWrapper.Wrappers, + w => w.CompressionType == CompressionType.BZip2 + ); + Assert.NotNull(bzip2Wrapper); + Assert.Equal(BZip2Constants.baseBlockSize * 9, bzip2Wrapper.MinimumRewindBufferSize); + } + + [Fact] + public void TarWrapper_Default_MinimumRewindBufferSize_Is_DefaultRewindableBufferSize() + { + // Non-BZip2 wrappers that don't specify a custom size default to + // Constants.RewindableBufferSize so existing behaviour is unchanged. + var noneWrapper = Array.Find( + TarWrapper.Wrappers, + w => w.CompressionType == CompressionType.None + ); + Assert.NotNull(noneWrapper); + Assert.Equal(Common.Constants.RewindableBufferSize, noneWrapper.MinimumRewindBufferSize); + } + [Fact] public void Tar_BZip2_Entry_Stream() {