-
Notifications
You must be signed in to change notification settings - Fork 510
Dynamic ring buffer sizing for BZip2 and ZStandard on non-seekable streams #1269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ddcbb3e
ca52cec
97c4ad2
d278cfb
456a7df
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -175,7 +175,17 @@ public virtual void StopRecording() | |
| // (frozen recording mode) until Rewind(stopRecording: true) is called | ||
| } | ||
|
|
||
| public virtual void StartRecording() | ||
| /// <summary> | ||
| /// Begins recording reads so that <see cref="Rewind()"/> can replay them. | ||
| /// </summary> | ||
| /// <param name="minBufferSize"> | ||
| /// Minimum ring buffer capacity in bytes. When provided and larger than | ||
| /// <see cref="Common.Constants.RewindableBufferSize"/>, the ring buffer is allocated | ||
| /// with this size. Pass the largest amount of compressed data that may be consumed | ||
| /// during format detection before the first rewind. Defaults to | ||
| /// <see cref="Common.Constants.RewindableBufferSize"/> when null or not supplied. | ||
| /// </param> | ||
| public virtual void StartRecording(int? minBufferSize = null) | ||
| { | ||
| if (_isPassthrough) | ||
| { | ||
|
|
@@ -190,10 +200,14 @@ public virtual void StartRecording() | |
| ); | ||
| } | ||
|
|
||
| // Ensure ring buffer exists | ||
| // Allocate ring buffer with the requested minimum size (at least the global default). | ||
| if (_ringBuffer is null) | ||
| { | ||
| _ringBuffer = new RingBuffer(Constants.RewindableBufferSize); | ||
| var size = | ||
| minBufferSize.GetValueOrDefault() > Constants.RewindableBufferSize | ||
| ? minBufferSize.GetValueOrDefault() | ||
| : Constants.RewindableBufferSize; | ||
| _ringBuffer = new RingBuffer(size); | ||
| } | ||
|
Comment on lines
188
to
211
|
||
|
|
||
| // Mark current position as recording anchor | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,3 +1,4 @@ | ||||||||||||||||
| using System; | ||||||||||||||||
| using System.IO; | ||||||||||||||||
| using System.Threading; | ||||||||||||||||
| using System.Threading.Tasks; | ||||||||||||||||
|
|
@@ -91,7 +92,10 @@ public static async ValueTask<IAsyncReader> OpenAsyncReader( | |||||||||||||||
| readerOptions ??= new ReaderOptions(); | ||||||||||||||||
| var sharpCompressStream = SharpCompressStream.Create( | ||||||||||||||||
| stream, | ||||||||||||||||
| bufferSize: readerOptions.RewindableBufferSize | ||||||||||||||||
| bufferSize: Math.Max( | ||||||||||||||||
| readerOptions.RewindableBufferSize ?? 0, | ||||||||||||||||
| TarWrapper.MaximumRewindBufferSize | ||||||||||||||||
| ) | ||||||||||||||||
| ); | ||||||||||||||||
|
Comment on lines
+95
to
99
|
||||||||||||||||
| bufferSize: Math.Max( | |
| readerOptions.RewindableBufferSize ?? 0, | |
| TarWrapper.MaximumRewindBufferSize | |
| ) | |
| ); | |
| bufferSize: readerOptions.RewindableBufferSize ?? TarWrapper.MaximumRewindBufferSize | |
| ); |
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -127,4 +127,58 @@ public void Position_SetWithinRecordedRange_Succeeds() | |||||||||||||
| Assert.Equal(3, readBuffer[0]); | ||||||||||||||
| Assert.Equal(4, readBuffer[1]); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| [Fact] | ||||||||||||||
| public void StartRecording_WithLargerMinBufferSize_AllowsLargeRewind() | ||||||||||||||
| { | ||||||||||||||
| // Simulates the BZip2 scenario: the ring buffer must be large enough | ||||||||||||||
| // from the moment StartRecording is called so that a large probe read | ||||||||||||||
| // (up to 900 KB for BZip2) can be rewound without buffer overflow. | ||||||||||||||
| const int largeSize = 100; | ||||||||||||||
| const int largeReadSize = 80; | ||||||||||||||
|
|
||||||||||||||
| var data = new byte[100]; | ||||||||||||||
| for (var i = 0; i < data.Length; i++) | ||||||||||||||
| { | ||||||||||||||
| data[i] = (byte)(i + 1); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| var ms = new MemoryStream(data); | ||||||||||||||
| var nonSeekableMs = new NonSeekableStreamWrapper(ms); | ||||||||||||||
| var stream = SharpCompressStream.Create(nonSeekableMs, largeSize); | ||||||||||||||
|
|
||||||||||||||
| // Pass the required size upfront — no expansion needed later | ||||||||||||||
|
Comment on lines
+148
to
+150
|
||||||||||||||
| var stream = SharpCompressStream.Create(nonSeekableMs, largeSize); | |
| // Pass the required size upfront — no expansion needed later | |
| var stream = SharpCompressStream.Create(nonSeekableMs); | |
| // Pass the required size upfront — forces allocation to at least largeSize |
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2,6 +2,8 @@ | |||||||||||
| using System.Collections.Generic; | ||||||||||||
| using System.IO; | ||||||||||||
| using SharpCompress.Common; | ||||||||||||
| using SharpCompress.Compressors.BZip2; | ||||||||||||
| using SharpCompress.Factories; | ||||||||||||
| using SharpCompress.Readers; | ||||||||||||
| using SharpCompress.Readers.Tar; | ||||||||||||
| using SharpCompress.Test.Mocks; | ||||||||||||
|
|
@@ -58,6 +60,53 @@ public void Tar_Skip() | |||||||||||
| [Fact] | ||||||||||||
| public void Tar_GZip_OldGnu_Reader() => Read("Tar.oldgnu.tar.gz", CompressionType.GZip); | ||||||||||||
|
|
||||||||||||
| [Fact] | ||||||||||||
| public void Tar_BZip2_Reader_NonSeekable() | ||||||||||||
| { | ||||||||||||
| // Regression test for: Dynamic default RingBuffer for BZip2 | ||||||||||||
| // Opening a .tar.bz2 from a non-seekable stream should succeed | ||||||||||||
| // because EnsureMinimumRewindBufferSize expands the ring buffer | ||||||||||||
| // to hold the BZip2 block before calling IsTarFile. | ||||||||||||
|
Comment on lines
+68
to
+69
|
||||||||||||
| // because EnsureMinimumRewindBufferSize expands the ring buffer | |
| // to hold the BZip2 block before calling IsTarFile. | |
| // because the rewind ring buffer is allocated with enough capacity | |
| // up front (e.g. via MaximumRewindBufferSize / StartRecording) to hold | |
| // the BZip2 block before calling IsTarFile. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot are there more kinds than just BZip2 to worry about? Zstandard?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Only BZip2 needs the enlarged buffer. The other formats all decompress incrementally:
ZSTD_DStreamInSize() ≈ 128 KBand produces output from each chunk before reading more. The default 160 KB ring buffer is larger, so no special treatment is needed.BZip2 is unique in that it must buffer and decode an entire compressed block before producing any output, and at compression level 9 that block can be up to 900 KB of compressed data.