Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issue where JSON diff would fail under specific circumstances. #8534

Merged
merged 4 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 59 additions & 40 deletions go/store/prolly/tree/indexed_json_diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,61 +44,74 @@ func NewIndexedJsonDiffer(ctx context.Context, from, to IndexedJsonDocument) (*I
differ.fromStop = differ.fromStop.parent
differ.toStop = differ.toStop.parent

if differ.from == nil || differ.to == nil {
// This can happen when either document fits in a single chunk.
// We don't use the chunk differ in this case, and instead we create the cursors without it.
var currentFromCursor, currentToCursor *JsonCursor
if differ.from == nil {
// The "from" document fits inside a single chunk.
// We can't create the "from" cursor from the differ, so we create it here instead.
diffKey := []byte{byte(startOfValue)}
currentFromCursor, err := newJsonCursorAtStartOfChunk(ctx, from.m.NodeStore, from.m.Root, diffKey)
currentFromCursor, err = newJsonCursorAtStartOfChunk(ctx, from.m.NodeStore, from.m.Root, diffKey)
if err != nil {
return nil, err
}
currentToCursor, err := newJsonCursorAtStartOfChunk(ctx, to.m.NodeStore, to.m.Root, diffKey)
// Advance the cursor past the "beginning of document" location, so that it aligns with the "to" cursor no matter what.
err = advanceCursor(ctx, &currentFromCursor)
if err != nil {
return nil, err
}
}

if differ.to == nil {
// The "to" document fits inside a single chunk.
// We can't create the "from" cursor from the differ, so we create it here instead.
diffKey := []byte{byte(startOfValue)}
currentToCursor, err = newJsonCursorAtStartOfChunk(ctx, to.m.NodeStore, to.m.Root, diffKey)
if err != nil {
return nil, err
}
// Advance the cursor past the "beginning of document" location, so that it aligns with the "from" cursor no matter what.
err = advanceCursor(ctx, &currentToCursor)
if err != nil {
return nil, err
}
return &IndexedJsonDiffer{
differ: differ,
from: from,
to: to,
currentFromCursor: currentFromCursor,
currentToCursor: currentToCursor,
}, nil
}

return &IndexedJsonDiffer{
differ: differ,
from: from,
to: to,
differ: differ,
from: from,
to: to,
currentFromCursor: currentFromCursor,
currentToCursor: currentToCursor,
}, nil
}

func advanceCursor(ctx context.Context, jCur **JsonCursor) (err error) {
if (*jCur).jsonScanner.atEndOfChunk() {
err = (*jCur).cur.advance(ctx)
if err != nil {
return err
}
*jCur = nil
} else {
err = (*jCur).jsonScanner.AdvanceToNextLocation()
if err != nil {
return err
}
}
return nil
}

// Next computes the next diff between the two JSON documents.
// To accomplish this, it uses the underlying Differ to find chunks that have changed, and uses a pair of JsonCursors
// to walk corresponding chunks.
func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error) {
// helper function to advance a JsonCursor and set it to nil if it reaches the end of a chunk
advanceCursor := func(jCur **JsonCursor) (err error) {
if (*jCur).jsonScanner.atEndOfChunk() {
err = (*jCur).cur.advance(ctx)
if err != nil {
return err
}
*jCur = nil
} else {
err = (*jCur).jsonScanner.AdvanceToNextLocation()
if err != nil {
return err
}
}
return nil
}

newAddedDiff := func(key []byte) (JsonDiff, error) {
addedValue, err := jd.currentToCursor.NextValue(ctx)
if err != nil {
return JsonDiff{}, err
}
err = advanceCursor(&jd.currentToCursor)
err = advanceCursor(ctx, &jd.currentToCursor)
if err != nil {
return JsonDiff{}, err
}
Expand All @@ -114,7 +127,7 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
if err != nil {
return JsonDiff{}, err
}
err = advanceCursor(&jd.currentFromCursor)
err = advanceCursor(ctx, &jd.currentFromCursor)
if err != nil {
return JsonDiff{}, err
}
Expand Down Expand Up @@ -150,29 +163,35 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
return JsonDiff{}, err
}
} else if jd.currentFromCursor == nil {
// We exhausted the current `from` chunk but not the `to` chunk. Since the chunk boundaries don't align on
// We exhausted the current `from` chunk but not the current `to` chunk. Since the chunk boundaries don't align on
// the same key, we need to continue into the next chunk.

// Alternatively, the "to" cursor was created during construction because the "to" document fit in a single chunk,
// and the "from" cursor hasn't been created yet.

jd.currentFromCursor, err = newJsonCursorFromCursor(ctx, jd.differ.from)
if err != nil {
return JsonDiff{}, err
}

err = advanceCursor(&jd.currentFromCursor)
err = advanceCursor(ctx, &jd.currentFromCursor)
if err != nil {
return JsonDiff{}, err
}
continue
} else if jd.currentToCursor == nil {
// We exhausted the current `to` chunk but not the `from` chunk. Since the chunk boundaries don't align on
// We exhausted the current `to` chunk but not the current `from` chunk. Since the chunk boundaries don't align on
// the same key, we need to continue into the next chunk.

// Alternatively, the "from" cursor was created during construction because the "from" document fit in a single chunk,
// and the "to" cursor hasn't been created yet.

jd.currentToCursor, err = newJsonCursorFromCursor(ctx, jd.differ.to)
if err != nil {
return JsonDiff{}, err
}

err = advanceCursor(&jd.currentToCursor)
err = advanceCursor(ctx, &jd.currentToCursor)
if err != nil {
return JsonDiff{}, err
}
Expand Down Expand Up @@ -209,11 +228,11 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
if compareJsonLocations(fromCurrentLocation, toCurrentLocation) != 0 {
return JsonDiff{}, jsonParseError
}
err = advanceCursor(&jd.currentFromCursor)
err = advanceCursor(ctx, &jd.currentFromCursor)
if err != nil {
return JsonDiff{}, err
}
err = advanceCursor(&jd.currentToCursor)
err = advanceCursor(ctx, &jd.currentToCursor)
if err != nil {
return JsonDiff{}, err
}
Expand All @@ -230,11 +249,11 @@ func (jd *IndexedJsonDiffer) Next(ctx context.Context) (diff JsonDiff, err error
// Otherwise, compare them and possibly return a modification.
if (fromScanner.current() == '{' && toScanner.current() == '{') ||
(fromScanner.current() == '[' && toScanner.current() == '[') {
err = advanceCursor(&jd.currentFromCursor)
err = advanceCursor(ctx, &jd.currentFromCursor)
if err != nil {
return JsonDiff{}, err
}
err = advanceCursor(&jd.currentToCursor)
err = advanceCursor(ctx, &jd.currentToCursor)
if err != nil {
return JsonDiff{}, err
}
Expand Down
25 changes: 21 additions & 4 deletions go/store/prolly/tree/json_diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,13 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest {
ctx := sql.NewEmptyContext()
ns := NewTestNodeStore()

emptyDocument := types.JSONDocument{Val: types.JsonObject{}}

insert := func(document types.MutableJSON, path string, val interface{}) types.MutableJSON {
jsonVal, inRange, err := types.JSON.Convert(val)
require.NoError(t, err)
require.True(t, (bool)(inRange))
document = document.Clone(ctx).(types.MutableJSON)
newDoc, changed, err := document.Insert(ctx, path, jsonVal.(sql.JSONWrapper))
require.NoError(t, err)
require.True(t, changed)
Expand Down Expand Up @@ -400,6 +403,18 @@ func largeJsonDiffTests(t *testing.T) []jsonDiffTest {
},
},
},
{
// This is a regression test.
// If:
// - One document fits in a single chunk and the other doesn't
// - The location of the chunk boundary in the larger document is also present in the smaller document
// - The chunk boundary doesn't fall at the beginning of value.
// Then the differ would fail to advance the prolly tree cursor and would incorrectly see the larger document as corrupt.
// The values in this test case are specifically chosen to meet these conditions.
name: "no error when diffing large doc with small doc",
from: largeObject,
to: insert(emptyDocument, "$.level6", insert(emptyDocument, "$.level4", lookup(largeObject, "$.level6.level4"))),
},
}
}

Expand Down Expand Up @@ -473,9 +488,11 @@ func runTest(t *testing.T, test jsonDiffTest) {

return cmp == 0
}
require.Equal(t, len(test.expectedDiffs), len(actualDiffs))
for i, expected := range test.expectedDiffs {
actual := actualDiffs[i]
require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual))
if test.expectedDiffs != nil {
require.Equal(t, len(test.expectedDiffs), len(actualDiffs))
for i, expected := range test.expectedDiffs {
actual := actualDiffs[i]
require.True(t, diffsEqual(expected, actual), fmt.Sprintf("Expected: %v\nActual: %v", expected, actual))
}
}
}
2 changes: 1 addition & 1 deletion go/store/prolly/tree/json_scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type JsonScanner struct {
valueOffset int
}

var jsonParseError = fmt.Errorf("an error occurred while reading or writing JSON to/from the database. This is most likely a bug, but could indicate database corruption")
var jsonParseError = fmt.Errorf("encountered invalid JSON while reading JSON from the database, or while preparing to write JSON to the database. This is most likely a bug in JSON diffing")

func (j JsonScanner) Clone() JsonScanner {
return JsonScanner{
Expand Down
Loading