From 9ba467608b1844e2de0fb0f4c5f8eda50da401e3 Mon Sep 17 00:00:00 2001 From: Sean Gosiaco Date: Sun, 2 Jul 2023 02:38:16 -0700 Subject: [PATCH] Fixed index out of range issue when loading large snapshot (#88) I recently found an issue when trying to load a snapshot created with a large number of rows (in my case 10 million). It would result in an index out of range panic. I was able to reproduce this in the unit tests with around 3 million players. ``` Running tool: /usr/local/go/bin/go test -timeout 30s -run ^TestLargeSnapshot$ github.com/kelindar/column --- FAIL: TestLargeSnapshot (4.20s) panic: runtime error: index out of range [128] with length 128 [recovered] panic: runtime error: index out of range [128] with length 128 goroutine 4 [running]: testing.tRunner.func1.2({0x1053a25c0, 0x14059d74930}) /usr/local/go/src/testing/testing.go:1389 +0x1c8 testing.tRunner.func1() /usr/local/go/src/testing/testing.go:1392 +0x384 panic({0x1053a25c0, 0x14059d74930}) /usr/local/go/src/runtime/panic.go:838 +0x204 github.com/kelindar/column.(*Collection).readState.func1.1(0x14010b50090) /Users/sgosiaco/repos/column/snapshot.go:188 +0x274 github.com/kelindar/column.(*Collection).Query(0x1400012c370, 0x1400005bd18) /Users/sgosiaco/repos/column/collection.go:354 +0x48 github.com/kelindar/column.(*Collection).readState.func1(0x1400005bd88?, 0x105250968?) /Users/sgosiaco/repos/column/snapshot.go:184 +0x78 github.com/kelindar/iostream.(*Reader).ReadRange(0x1402a07f020, 0x1400010fe10) /Users/sgosiaco/go/pkg/mod/github.com/kelindar/iostream@v1.3.0/reader.go:227 +0x8c github.com/kelindar/column.(*Collection).readState(0x1400012c370, {0x1053c3380?, 0x14000163900?}) /Users/sgosiaco/repos/column/snapshot.go:183 +0x1d4 github.com/kelindar/column.(*Collection).Restore(0x1400012c370, {0x1053c3080, 0x140001014a0}) /Users/sgosiaco/repos/column/snapshot.go:44 +0x54 github.com/kelindar/column.TestLargeSnapshot(0x140001151e0) /Users/sgosiaco/repos/column/snapshot_test.go:229 +0x174 testing.tRunner(0x140001151e0, 0x1053bf558) /usr/local/go/src/testing/testing.go:1439 +0x110 created by testing.(*T).Run /usr/local/go/src/testing/testing.go:1486 +0x300 FAIL github.com/kelindar/column 4.339s FAIL ``` Looking at the code I saw that currently the maximum number of chunks able to be read is 128 due to the initialized size of the commits array. I've changed this to a map to allow for an unknown number of chunks/commits to be loaded since when saving the snapshot there doesn't seem to be a limit on the potential number of chunks that can be saved. --- snapshot.go | 6 +++--- snapshot_test.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/snapshot.go b/snapshot.go index a7de7f3..7003d14 100644 --- a/snapshot.go +++ b/snapshot.go @@ -163,9 +163,9 @@ func (c *Collection) writeState(dst io.Writer) (int64, error) { // readState reads a collection snapshotted state from the underlying reader. It // returns the last commit IDs for each chunk. -func (c *Collection) readState(src io.Reader) ([]uint64, error) { +func (c *Collection) readState(src io.Reader) (map[commit.Chunk]uint64, error) { r := iostream.NewReader(src) - commits := make([]uint64, 128) + commits := make(map[commit.Chunk]uint64) // Read the version and make sure it matches version, err := r.ReadUvarint() @@ -185,7 +185,7 @@ func (c *Collection) readState(src io.Reader) ([]uint64, error) { txn.dirty.Set(uint32(chunk)) // Read the last written commit ID for the chunk - if commits[chunk], err = r.ReadUvarint(); err != nil { + if commits[commit.Chunk(chunk)], err = r.ReadUvarint(); err != nil { return err } diff --git a/snapshot_test.go b/snapshot_test.go index a0f8206..abcc3e0 100644 --- a/snapshot_test.go +++ b/snapshot_test.go @@ -202,6 +202,34 @@ func TestSnapshot(t *testing.T) { assert.Equal(t, amount, output.Count()) } +func TestLargeSnapshot(t *testing.T) { + amount := 3_000_000 + buffer := bytes.NewBuffer(nil) + input := loadPlayers(amount) + + var wg sync.WaitGroup + wg.Add(amount) + go func() { + for i := 0; i < amount; i++ { + assert.NoError(t, input.QueryAt(uint32(i), func(r Row) error { + r.SetString("name", "Roman") + return nil + })) + wg.Done() + } + }() + + // Start snapshotting + assert.NoError(t, input.Snapshot(buffer)) + assert.NotZero(t, buffer.Len()) + + // Restore the snapshot + wg.Wait() + output := newEmpty(amount) + assert.NoError(t, output.Restore(buffer)) + assert.Equal(t, amount, output.Count()) +} + func TestSnapshotFailures(t *testing.T) { input := NewCollection() input.CreateColumn("name", ForString())