Skip to content

Commit

Permalink
VAULT-20405 chunk decompression to prevent loading full decompressed …
Browse files Browse the repository at this point in the history
…data into memory at once (#26464)

* VAULT-20405 chunk decompression to prevent loading full decompressed data into memory at once

* Add changelog
  • Loading branch information
VioletHynes authored Apr 18, 2024
1 parent ade585a commit 85ed817
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 8 deletions.
3 changes: 3 additions & 0 deletions changelog/26464.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
sdk/decompression: DecompressWithCanary will now chunk the decompression in memory to prevent loading it all at once.
```
25 changes: 17 additions & 8 deletions sdk/helper/compressutil/compress.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"io"

"github.com/golang/snappy"
"github.com/hashicorp/errwrap"
"github.com/pierrec/lz4"
)

Expand All @@ -34,7 +33,7 @@ const (
CompressionCanaryLZ4 byte = '4'
)

// SnappyReadCloser embeds the snappy reader which implements the io.Reader
// CompressUtilReadCloser embeds the snappy reader which implements the io.Reader
// interface. The decompress procedure in this utility expects an
// io.ReadCloser. This type implements the io.Closer interface to retain the
// generic way of decompression.
Expand Down Expand Up @@ -98,7 +97,7 @@ func Compress(data []byte, config *CompressionConfig) ([]byte, error) {
// These are valid compression levels
default:
// If compression level is set to NoCompression or to
// any invalid value, fallback to Defaultcompression
// any invalid value, fallback to DefaultCompression
config.GzipCompressionLevel = gzip.DefaultCompression
}
writer, err = gzip.NewWriterLevel(&buf, config.GzipCompressionLevel)
Expand All @@ -116,7 +115,7 @@ func Compress(data []byte, config *CompressionConfig) ([]byte, error) {
}

if err != nil {
return nil, errwrap.Wrapf("failed to create a compression writer: {{err}}", err)
return nil, fmt.Errorf("failed to create a compression writer: %w", err)
}

if writer == nil {
Expand All @@ -126,7 +125,7 @@ func Compress(data []byte, config *CompressionConfig) ([]byte, error) {
// Compress the input and place it in the same buffer containing the
// canary byte.
if _, err = writer.Write(data); err != nil {
return nil, errwrap.Wrapf("failed to compress input data: err: {{err}}", err)
return nil, fmt.Errorf("failed to compress input data: err: %w", err)
}

// Close the io.WriteCloser
Expand Down Expand Up @@ -206,7 +205,7 @@ func DecompressWithCanary(data []byte) ([]byte, string, bool, error) {
return nil, "", true, nil
}
if err != nil {
return nil, "", false, errwrap.Wrapf("failed to create a compression reader: {{err}}", err)
return nil, "", false, fmt.Errorf("failed to create a compression reader: %w", err)
}
if reader == nil {
return nil, "", false, fmt.Errorf("failed to create a compression reader")
Expand All @@ -217,8 +216,18 @@ func DecompressWithCanary(data []byte) ([]byte, string, bool, error) {

// Read all the compressed data into a buffer
var buf bytes.Buffer
if _, err = io.Copy(&buf, reader); err != nil {
return nil, "", false, err

// Read the compressed data into a buffer, but do so
// slowly to prevent reading all the data into memory
// at once (protecting against e.g. zip bombs).
for {
_, err := io.CopyN(&buf, reader, 1024)
if err != nil {
if err == io.EOF {
break
}
return nil, "", false, err
}
}

return buf.Bytes(), compressionType, false, nil
Expand Down
37 changes: 37 additions & 0 deletions sdk/helper/compressutil/compress_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,40 @@ func TestCompressUtil_InvalidConfigurations(t *testing.T) {
t.Fatal("expected an error")
}
}

// TestDecompressWithCanaryLargeInput tests that DecompressWithCanary works
// as expected even with large values.
func TestDecompressWithCanaryLargeInput(t *testing.T) {
t.Parallel()

inputJSON := `{"sample":"data`
for i := 0; i < 100000; i++ {
inputJSON += " and data"
}
inputJSON += `"}`
inputJSONBytes := []byte(inputJSON)

compressedJSONBytes, err := Compress(inputJSONBytes, &CompressionConfig{Type: CompressionTypeGzip, GzipCompressionLevel: gzip.BestCompression})
if err != nil {
t.Fatal(err)
}

decompressedJSONBytes, wasNotCompressed, err := Decompress(compressedJSONBytes)
if err != nil {
t.Fatal(err)
}

// Check if the input for decompress was not compressed in the first place
if wasNotCompressed {
t.Fatalf("bytes were not compressed as expected")
}

if len(decompressedJSONBytes) == 0 {
t.Fatalf("bytes were not compressed as expected")
}

// Compare the value after decompression
if !bytes.Equal(inputJSONBytes, decompressedJSONBytes) {
t.Fatalf("decompressed value differs: decompressed value;\nexpected: %q\nactual: %q", string(inputJSONBytes), string(decompressedJSONBytes))
}
}

0 comments on commit 85ed817

Please sign in to comment.