From 987bfdddfa3bf814040a90749f10ed05f72e378c Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 28 Nov 2019 18:43:34 +0100 Subject: [PATCH] S2: limit max repeat length On single, extremely large single block encodes with high extremely long repeats, we could store the wrong length. Does not affect streaming mode. --- s2/encode_other.go | 9 +++++++++ s2/s2_test.go | 26 ++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/s2/encode_other.go b/s2/encode_other.go index 0f38305208..08cb5b8540 100644 --- a/s2/encode_other.go +++ b/s2/encode_other.go @@ -161,12 +161,21 @@ func emitRepeat(dst []byte, offset, length int) int { dst[0] = 6<<2 | tagCopy1 return 4 } + const maxRepeat = (1 << 24) - 1 length -= 1 << 16 + left := 0 + if length > maxRepeat { + left = length - maxRepeat + length = maxRepeat + } dst[4] = uint8(length >> 16) dst[3] = uint8(length >> 8) dst[2] = uint8(length >> 0) dst[1] = 0 dst[0] = 7<<2 | tagCopy1 + if left > 0 { + return 5 + emitRepeat(dst[5:], offset, left) + } return 5 } diff --git a/s2/s2_test.go b/s2/s2_test.go index 010e934713..db679978a7 100644 --- a/s2/s2_test.go +++ b/s2/s2_test.go @@ -1531,6 +1531,32 @@ func testFile(t *testing.T, i, repeat int) { }) } +func TestDataRoundtrips(t *testing.T) { + test := func(t *testing.T, data []byte) { + t.Run("s2", func(t *testing.T) { + testWriterRoundtrip(t, data) + }) + t.Run("s2-better", func(t *testing.T) { + testWriterRoundtrip(t, data, WriterBetterCompression()) + }) + t.Run("block", func(t *testing.T) { + d := data + testBlockRoundtrip(t, d) + }) + t.Run("block-better", func(t *testing.T) { + d := data + testBetterBlockRoundtrip(t, d) + }) + t.Run("snappy", func(t *testing.T) { + testSnappyDecode(t, data) + }) + } + t.Run("longblock", func(t *testing.T) { + data := make([]byte, 1<<25) + test(t, data) + }) +} + // Naming convention is kept similar to what snappy's C++ implementation uses. func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) } func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) }