Skip to content

Commit

Permalink
feat: add LCS (Longest Common Subsequences) based array comparison
Browse files Browse the repository at this point in the history
The default comparison behavior doesn't change. Use the option LCS()
to enable the LCS-based behavior.
  • Loading branch information
wI2L committed Nov 4, 2023
1 parent 5303ffa commit a434103
Show file tree
Hide file tree
Showing 11 changed files with 265 additions and 32 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ _testmain.go
vendor/
Godeps/

# markdown-spellcheck
# markdown-spellcheck
.spelling

# CI/CD
benchstats
.benchruns
coverage.txt
dist/
*.txt
*.txt
6 changes: 2 additions & 4 deletions .golangci.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
run:
go: 1.21
timeout: 10m
linters:
disable-all: true
enable:
- asciicheck
- bodyclose
- deadcode
- depguard
- dogsled
- dupl
Expand All @@ -29,18 +29,16 @@ linters:
- revive
- rowserrcheck
- staticcheck
- structcheck
- stylecheck
- typecheck
- unconvert
- unparam
- unused
- varcheck
- whitespace
linters-settings:
gofmt:
simplify: true
dupl:
threshold: 400
funlen:
lines: 120
lines: 120
101 changes: 81 additions & 20 deletions differ.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type options struct {
rationalize bool
invertible bool
equivalent bool
lcs bool
}

type jsonNode struct {
Expand Down Expand Up @@ -130,7 +131,11 @@ func (d *Differ) diff(ptr pointer, src, tgt interface{}, doc string) {
// equivalent.
switch val := src.(type) {
case []interface{}:
d.compareArrays(ptr, val, tgt.([]interface{}), doc)
if d.opts.lcs {
d.compareArraysLCS(ptr, val, tgt.([]interface{}), doc)
} else {
d.compareArrays(ptr, val, tgt.([]interface{}), doc)
}
case map[string]interface{}:
d.compareObjects(ptr, val, tgt.(map[string]interface{}), doc)
default:
Expand Down Expand Up @@ -271,17 +276,17 @@ func (d *Differ) compareObjects(ptr pointer, src, tgt map[string]interface{}, do
func (d *Differ) compareArrays(ptr pointer, src, tgt []interface{}, doc string) {
ptr.snapshot()
sl, tl := len(src), len(tgt)
min := min(sl, tl)
ml := min(sl, tl)

// When the source array contains more elements
// than the target, entries are being removed
// from the destination and the removal index
// is always equal to the original array length.
if tl < sl {
np := ptr.clone()
np.appendIndex(min) // "removal" path
np.appendIndex(ml) // "removal" path
p := np.copy()
for i := min; i < sl; i++ {
for i := ml; i < sl; i++ {
ptr.appendIndex(i)

if !d.isIgnored(ptr) {
Expand All @@ -297,7 +302,7 @@ func (d *Differ) compareArrays(ptr pointer, src, tgt []interface{}, doc string)
comparisons:
// Compare the elements at each index present in
// both the source and destination arrays.
for i := 0; i < min; i++ {
for i := 0; i < ml; i++ {
ptr.appendIndex(i)
if d.opts.rationalize {
d.diff(ptr, src[i], tgt[i], findIndex(doc, ptr.base.idx))
Expand All @@ -313,7 +318,7 @@ comparisons:
np := ptr.clone()
np.appendKey("-") // "append" path
p := np.copy()
for i := min; i < tl; i++ {
for i := ml; i < tl; i++ {
ptr.appendIndex(i)
if !d.isIgnored(ptr) {
d.add(p, tgt[i], doc)
Expand All @@ -323,6 +328,76 @@ comparisons:
}
}

func (d *Differ) compareArraysLCS(ptr pointer, src, tgt []interface{}, doc string) {
ptr.snapshot()
pairs := lcs(src, tgt)

var ai, bi int // src && tgt arrows
for p := 0; p < len(pairs); p++ {
ma, mb := pairs[p][0], pairs[p][1]

for ai < ma || bi < mb {
if ai < ma && bi < mb {
ptr.appendIndex(ai)
if d.opts.rationalize {
d.diff(ptr, src[ai], tgt[bi], findIndex(doc, ptr.base.idx))
} else {
d.diff(ptr, src[ai], tgt[bi], doc)
}
ptr.rewind()
ai++
bi++
} else if ai < ma {
ptr.appendIndex(ai)

if !d.isIgnored(ptr) {
d.remove(ptr.copy(), src[ai])
}
ptr.rewind()
ai++
} else {
ptr.appendIndex(bi)
if !d.isIgnored(ptr) {
d.add(ptr.copy(), tgt[bi], doc)
}
ptr.rewind()
bi++
}
}
// src[ai] == tgt[bi]
ai++
bi++
}
for ai < len(src) || bi < len(tgt) {
if ai < len(src) && bi < len(tgt) {
ptr.appendIndex(ai)
if d.opts.rationalize {
d.diff(ptr, src[ai], tgt[bi], findIndex(doc, ptr.base.idx))
} else {
d.diff(ptr, src[ai], tgt[bi], doc)
}
ptr.rewind()
ai++
bi++
} else if ai < len(src) {
ptr.appendIndex(ai)

if !d.isIgnored(ptr) {
d.remove(ptr.copy(), src[ai])
}
ptr.rewind()
ai++
} else { // bi < len(tgt)
ptr.appendIndex(bi)
if !d.isIgnored(ptr) {
d.add(ptr.copy(), tgt[bi], doc)
}
ptr.rewind()
bi++
}
}
}

func (d *Differ) unorderedDeepEqualSlice(src, tgt []interface{}) bool {
if len(src) != len(tgt) {
return false
Expand Down Expand Up @@ -439,17 +514,3 @@ func insertionSort(v []string) {
func b2s(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}

func min(i, j int) int {
if i < j {
return i
}
return j
}

func max(i, j int) int {
if i > j {
return i
}
return j
}
62 changes: 60 additions & 2 deletions differ_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ func TestRootCases(t *testing.T) { runCasesFromFile(t, "testdata/tests/root.js
func TestDiffer_Reset(t *testing.T) {
d := &Differ{
ptr: pointer{
buf: make([]byte, 15, 15),
buf: make([]byte, 15),
sep: 15,
},
hashmap: map[uint64]jsonNode{
1: {},
},
patch: make([]Operation, 42, 42),
patch: make([]Operation, 42),
}
d.Reset()

Expand Down Expand Up @@ -197,6 +197,64 @@ func TestDiffer_unorderedDeepEqualSlice(t *testing.T) {
}
}

func Test_issue17(t *testing.T) {
type (
VolumeMount struct {
Name string `json:"name"`
MountPath string `json:"mountPath"`
}
Container struct {
VolumeMounts []VolumeMount `json:"volumeMounts,omitempty"`
}
)
src := Container{
VolumeMounts: []VolumeMount{{
Name: "name1",
MountPath: "/foo/bar/1",
}, {
Name: "name2",
MountPath: "/foo/bar/2",
}, {
Name: "name3",
MountPath: "/foo/bar/3",
}, {
Name: "name4",
MountPath: "/foo/bar/4",
}, {
Name: "name5",
MountPath: "/foo/bar/5",
}, {
Name: "name6",
MountPath: "/foo/bar/6",
}},
}
tgt := Container{
VolumeMounts: []VolumeMount{{
Name: "name1",
MountPath: "/foo/bar/1",
}, {
Name: "name2",
MountPath: "/foo/bar/2",
}, {
Name: "name4",
MountPath: "/foo/bar/4",
}, {
Name: "name5",
MountPath: "/foo/bar/5",
}, {
Name: "name6",
MountPath: "/foo/bar/6",
}},
}
patch, _ := Compare(src, tgt, LCS())

if len(patch) != 1 {
t.Errorf("expected a patch with 1 operation, got %d", len(patch))
}
b, _ := json.Marshal(patch)
t.Logf("%s", string(b))
}

func Benchmark_sortStrings(b *testing.B) {
if testing.Short() {
b.Skip()
Expand Down
2 changes: 1 addition & 1 deletion example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ func ExampleCompareJSON() {
Age int `json:"age"`
Phones []Phone `json:"phoneNumbers"`
}
source, err := os.ReadFile("testdata/examples/john.json")
source, err := os.ReadFile("testdata/examples/person.json")
if err != nil {
log.Fatal(err)
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module github.com/wI2L/jsondiff

go 1.18
go 1.21
39 changes: 39 additions & 0 deletions lcs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
package jsondiff

// lcs computes the longest common subsequence of two
// slices and returns the index pairs of the LCS.
func lcs(src, tgt []interface{}) [][2]int {
t := make([][]int, len(src)+1)

for i := 0; i <= len(src); i++ {
t[i] = make([]int, len(tgt)+1)
}
for i := 1; i < len(t); i++ {
for j := 1; j < len(t[i]); j++ {
if deepEqual(src[i-1], tgt[j-1]) {
t[i][j] = t[i-1][j-1] + 1
} else {
t[i][j] = max(t[i-1][j], t[i][j-1])
}
}
}
i, j := len(src), len(tgt)
s := make([][2]int, 0, t[i][j])

for i > 0 && j > 0 {
switch {
case deepEqual(src[i-1], tgt[j-1]):
s = append(s, [2]int{i - 1, j - 1})
i--
j--
case t[i-1][j] > t[i][j-1]:
i--
default:
j--
}
}
for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 {
s[i], s[j] = s[j], s[i]
}
return s
}
Loading

0 comments on commit a434103

Please sign in to comment.