Skip to content

Commit 8547e7d

Browse files
committed
initial commit: implement depth-first visiting of a JSON tree
0 parents  commit 8547e7d

9 files changed

+661
-0
lines changed

LICENSE

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
Copyright (c) 2017 Tyler Christensen. All rights reserved.
2+
3+
Redistribution and use in source and binary forms, with or without
4+
modification, are permitted provided that the following conditions are
5+
met:
6+
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above
10+
copyright notice, this list of conditions and the following disclaimer
11+
in the documentation and/or other materials provided with the
12+
distribution.
13+
* Neither the name of Google Inc. nor the names of its
14+
contributors may be used to endorse or promote products derived from
15+
this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# jsonoscope [![GoDoc](https://godoc.org/github.com/tylerchr/jsonoscope?status.svg)](https://godoc.org/github.com/tylerchr/jsonoscope)
2+
3+
Package jsonoscope contains tooling for structural analysis of JSON documents. It implements depth-first tree traversal of JSON trees and invokes visitor methods for the entry and exit of each node.

doc.go

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
// Package jsonoscope contains tooling for structural analysis of JSON documents.
2+
package jsonoscope

equal.go

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
package jsonoscope
2+
3+
import (
4+
"bytes"
5+
"io"
6+
)
7+
8+
// Equal indicates whether the two Readers contain semantically
9+
// identical JSON data by comparing the signatures of their root
10+
// JSON objects.
11+
func Equal(r1, r2 io.Reader) (bool, error) {
12+
13+
var sig1, sig2 []byte
14+
15+
_ = Recurse(r1, CustomVisitor{
16+
OnExit: func(path string, token Token, sig []byte) {
17+
if path == "." {
18+
sig1 = sig
19+
}
20+
},
21+
})
22+
23+
_ = Recurse(r2, CustomVisitor{
24+
OnExit: func(path string, token Token, sig []byte) {
25+
if path == "." {
26+
sig2 = sig
27+
}
28+
},
29+
})
30+
31+
return bytes.Equal(sig1, sig2), nil
32+
33+
}

equal_test.go

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
package jsonoscope
2+
3+
import (
4+
"bytes"
5+
"encoding/json"
6+
"reflect"
7+
"testing"
8+
)
9+
10+
func TestEqual(t *testing.T) {
11+
12+
cases := []struct {
13+
First, Second []byte
14+
Equal bool
15+
}{
16+
// formatting does not matter
17+
{
18+
First: []byte(`[1,2,3]`),
19+
Second: []byte(`[ 1, 2, 3 ]`),
20+
Equal: true,
21+
},
22+
23+
// order matters in arrays
24+
{
25+
First: []byte(`[1, 2, 3]`),
26+
Second: []byte(`[1, 3, 2]`),
27+
Equal: false,
28+
},
29+
30+
// order does not matter in objects
31+
{
32+
First: []byte(`{ "Planet": "Earth", "Index": 3 }`),
33+
Second: []byte(`{ "Index": 3, "Planet": "Earth" }`),
34+
Equal: true,
35+
},
36+
}
37+
38+
for i, c := range cases {
39+
40+
eq, err := Equal(bytes.NewReader(c.First), bytes.NewReader(c.Second))
41+
if err != nil {
42+
panic(err)
43+
}
44+
45+
if eq != c.Equal {
46+
t.Errorf("[case %d] Unexpected equality: expected %t but got %t\n", i, c.Equal, eq)
47+
}
48+
49+
}
50+
51+
}
52+
53+
func BenchmarkEqual(b *testing.B) {
54+
55+
for i := 0; i < b.N; i++ {
56+
eq, _ := Equal(bytes.NewReader(SampleJSON), bytes.NewReader(SampleJSON))
57+
if !eq {
58+
b.Fatalf("not equal")
59+
}
60+
}
61+
62+
}
63+
64+
func BenchmarkDeepEqual(b *testing.B) {
65+
66+
for i := 0; i < b.N; i++ {
67+
68+
var json1, json2 map[string]interface{}
69+
70+
_ = json.NewDecoder(bytes.NewReader(SampleJSON)).Decode(&json1)
71+
_ = json.NewDecoder(bytes.NewReader(SampleJSON)).Decode(&json2)
72+
73+
eq := reflect.DeepEqual(json1, json2)
74+
if !eq {
75+
b.Fatalf("not equal")
76+
}
77+
}
78+
}

recurse.go

+250
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
package jsonoscope
2+
3+
import (
4+
"crypto/sha1"
5+
"encoding/json"
6+
"hash"
7+
"io"
8+
"strconv"
9+
"strings"
10+
"sync"
11+
)
12+
13+
const (
14+
Null Token = 1 + iota
15+
Number
16+
Boolean
17+
String
18+
Array
19+
Object
20+
)
21+
22+
type (
23+
recurser struct {
24+
dec *json.Decoder // the source of JSON tokens
25+
visitor Visitor // visitors to invoke as we traverse the JSON tree
26+
hasher hash.Hash // a hash function used to calculate signatures
27+
28+
// sigpool pools buffers used for generating an object signature
29+
sigpool *sync.Pool
30+
31+
// we can precompute signatures for constant JSON values as soon
32+
// as we have a hash function, instead of recalculating them each
33+
// time they occur in the JSON data
34+
trueSig []byte
35+
falseSig []byte
36+
nullSig []byte
37+
}
38+
39+
// Token indicates the type of the value at a given JSON path. It is always
40+
// one of: Null, Number, Boolean, String, Array, or Object.
41+
Token int
42+
)
43+
44+
// String returns a string in the set {Null, Number, Boolean, String,
45+
// Array, Object}, or "<unknown>" if the value of Token t is invalid.
46+
func (t Token) String() string {
47+
switch t {
48+
case Null:
49+
return "Null"
50+
case Number:
51+
return "Number"
52+
case Boolean:
53+
return "Boolean"
54+
case String:
55+
return "String"
56+
case Array:
57+
return "Array"
58+
case Object:
59+
return "Object"
60+
default:
61+
return "<unknown>"
62+
}
63+
}
64+
65+
// Recurse performs a depth-first search over a JSON tree and invokes the methods
66+
// of the provided Visitor for each value in the tree.
67+
func Recurse(r io.Reader, vis Visitor) error {
68+
69+
h := sha1.New()
70+
71+
rec := &recurser{
72+
dec: json.NewDecoder(r),
73+
visitor: vis,
74+
hasher: h,
75+
sigpool: &sync.Pool{
76+
New: func() interface{} {
77+
return make([]byte, h.Size())
78+
},
79+
},
80+
}
81+
82+
rec.precompute()
83+
84+
rec.dec.UseNumber()
85+
86+
_, err := rec.recurse()
87+
return err
88+
}
89+
90+
// recurse recurses through the JSON from r.dec
91+
func (r *recurser) recurse() ([]byte, error) {
92+
t, err := r.dec.Token()
93+
if err != nil {
94+
return nil, err
95+
}
96+
return r.recurseToken(".", t)
97+
}
98+
99+
// precompute generates hashes for true, false, and null.
100+
func (r *recurser) precompute() {
101+
102+
r.hasher.Reset()
103+
r.hasher.Write([]byte("true"))
104+
r.trueSig = r.hasher.Sum(nil)
105+
106+
r.hasher.Reset()
107+
r.hasher.Write([]byte("false"))
108+
r.falseSig = r.hasher.Sum(nil)
109+
110+
r.hasher.Reset()
111+
r.hasher.Write([]byte("null"))
112+
r.nullSig = r.hasher.Sum(nil)
113+
114+
}
115+
116+
// recurseToken generates the hash of any JSON token.
117+
func (r *recurser) recurseToken(path string, t json.Token) (sig []byte, err error) {
118+
119+
switch tt := t.(type) {
120+
121+
case json.Delim: // for the four JSON delimiters [ ] { }
122+
if tt == '[' {
123+
r.visitor.Enter(path, Array)
124+
sig, err = r.recurseArray(path)
125+
r.visitor.Exit(path, Array, sig)
126+
} else if tt == '{' {
127+
r.visitor.Enter(path, Object)
128+
sig, err = r.recurseObject(path)
129+
r.visitor.Exit(path, Object, sig)
130+
}
131+
132+
case bool: // for JSON booleans
133+
r.visitor.Enter(path, Boolean)
134+
if tt {
135+
sig = r.trueSig[:]
136+
} else {
137+
sig = r.falseSig[:]
138+
}
139+
r.visitor.Exit(path, Boolean, sig)
140+
141+
case json.Number: // for JSON numbers
142+
r.visitor.Enter(path, Number)
143+
r.hasher.Reset()
144+
r.hasher.Write([]byte(tt))
145+
sig = r.hasher.Sum(nil)
146+
r.visitor.Exit(path, Number, sig)
147+
148+
case string: // for JSON string literals
149+
r.visitor.Enter(path, String)
150+
r.hasher.Reset()
151+
r.hasher.Write([]byte(`"` + tt + `"`))
152+
sig = r.hasher.Sum(nil)
153+
r.visitor.Exit(path, String, sig)
154+
155+
case nil: // for JSON null
156+
r.visitor.Enter(path, Null)
157+
sig = r.nullSig[:]
158+
r.visitor.Exit(path, Null, sig)
159+
160+
}
161+
162+
return
163+
164+
}
165+
166+
// recurseArray generates the hash of an array.
167+
func (r *recurser) recurseArray(path string) ([]byte, error) {
168+
169+
hh := sha1.New()
170+
var idx int64
171+
172+
for r.dec.More() {
173+
174+
t, err := r.dec.Token()
175+
176+
h, err := r.recurseToken(path+"["+strconv.FormatInt(idx, 10)+"]", t)
177+
if err != nil {
178+
return nil, err
179+
}
180+
181+
hh.Write(h[:])
182+
183+
idx++
184+
185+
}
186+
187+
r.dec.Token() // consume final ']'
188+
189+
return hh.Sum(nil), nil
190+
191+
}
192+
193+
// recurseObject generates the hash of an object.
194+
func (r *recurser) recurseObject(path string) ([]byte, error) {
195+
196+
// obtain a buffer to hold the object signature
197+
sig := r.sigpool.Get().([]byte)
198+
199+
// reset the signature
200+
for i := range sig {
201+
sig[i] = 0
202+
}
203+
204+
for r.dec.More() {
205+
206+
// read the key from the object
207+
t, err := r.dec.Token()
208+
if err != nil {
209+
return nil, err
210+
}
211+
212+
key := t.(string) // we know it is valid since r.dec.Token didn't error
213+
214+
// figure out the subpath for this key
215+
var subpath string
216+
if strings.HasSuffix(path, ".") {
217+
subpath = path + key
218+
} else {
219+
subpath = path + "." + key
220+
}
221+
222+
// recursively read the key's value
223+
t, err = r.dec.Token()
224+
if err != nil {
225+
return nil, err
226+
}
227+
228+
valueSignature, err := r.recurseToken(subpath, t)
229+
if err != nil {
230+
return nil, err
231+
}
232+
233+
// generate a signature for this KV pair
234+
r.hasher.Reset()
235+
r.hasher.Write([]byte(key))
236+
r.hasher.Write(valueSignature)
237+
238+
// xor this KV hash into our final KV hash
239+
for i, v := range r.hasher.Sum(nil) {
240+
sig[i] = sig[i] ^ v
241+
}
242+
243+
}
244+
245+
// consume the final '}'
246+
r.dec.Token()
247+
248+
return sig, nil
249+
250+
}

0 commit comments

Comments
 (0)