-
Notifications
You must be signed in to change notification settings - Fork 2
/
iterator.go
284 lines (251 loc) · 5.62 KB
/
iterator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
package jzon
import (
"bytes"
"fmt"
"io"
"runtime"
)
const bufferSize = 512
// for fast reset
type iteratorEmbedded struct {
/*
* The following attributes must be able to set zero by memset
*/
capture bool
offset int
// the current index position
head int
// eface checkpoint
lastEfaceOffset int
lastEfacePtr uintptr
// TODO: 1. type of context?
// TODO: 2. should context be reset as well?
Context interface{} // custom iteration context
}
// Iterator is designed for one-shot use, each reuse must call reset first
type Iterator struct {
cfg *DecoderConfig
reader io.Reader
buffer []byte
fixbuf []byte
// a temp buffer is needed for string reading
// which include utf8 conversion
tmpBuffer []byte
// the current tail position in buffer
tail int
iteratorEmbedded
useNumber bool
disallowUnknownFields bool
}
// NewIterator returns a new iterator.
func NewIterator() *Iterator {
return DefaultDecoderConfig.NewIterator()
}
// Release the iterator, the iterator should not be reused after call.
func (it *Iterator) Release() {
it.cfg.returnIterator(it)
}
func (it *Iterator) reset() {
it.reader = nil
it.buffer = nil
it.tail = 0
// fast reset
it.iteratorEmbedded = iteratorEmbedded{}
}
// Reset the iterator with an io.Reader
// if the reader is nil, reset the iterator to its initial state
//
// In reset methods, explicit assignment is faster than then following
// *it = Iterator{ ... }
// When the above code is used, runtime.duffcopy and runtime.duffzero will be used
// which will slow down our code (correct me if I am wrong)
func (it *Iterator) Reset(r io.Reader) {
switch v := r.(type) {
case nil:
it.reset()
return
case *bytes.Buffer:
it.ResetBytes(v.Bytes())
return
}
it.reader = r
it.buffer = it.fixbuf[:cap(it.fixbuf)]
it.tail = 0
// fast reset
it.iteratorEmbedded = iteratorEmbedded{}
}
// ResetBytes resets iterator with a byte slice
func (it *Iterator) ResetBytes(data []byte) {
it.reader = nil
it.buffer = data
it.tail = len(data)
// fast reset
it.iteratorEmbedded = iteratorEmbedded{}
}
// Buffer returns the current slice buffer of the iterator.
func (it *Iterator) Buffer() []byte {
return it.buffer[it.head:it.tail]
}
const errWidth = 20
func (it *Iterator) errorLocation() []byte {
var (
head int
tail int
)
if it.head > errWidth {
head = it.head - errWidth
}
if it.tail-it.head < errWidth {
tail = it.tail
} else {
tail = it.head + errWidth
}
return it.buffer[head:tail]
}
// WrapError wraps the error with the current iterator location
func (it *Iterator) WrapError(err error) *DecodeError {
if e, ok := err.(*DecodeError); ok {
return e
}
return &DecodeError{
reason: err,
location: string(it.errorLocation()),
}
}
// make sure that it.head == it.tail before call
// will set error
func (it *Iterator) readMore() error {
if it.reader == nil {
return io.EOF
}
var (
n int
err error
)
for {
if it.capture {
var buf [bufferSize]byte
n, err = it.reader.Read(buf[:])
it.buffer = append(it.buffer[:it.tail], buf[:n]...)
it.tail += n
// save internal buffer for reuse
it.fixbuf = it.buffer
} else {
if jzonDebug {
if it.head != it.tail {
panic(fmt.Errorf("head %d, tail %d", it.head, it.tail))
}
}
n, err = it.reader.Read(it.buffer)
it.offset += it.tail
it.head = 0
it.tail = n
}
if err != nil {
if err == io.EOF && n > 0 {
return nil
}
return err
}
if n > 0 {
return nil
}
// n == 0 && err == nil
// the implementation of the reader is wrong
runtime.Gosched()
}
}
// will NOT skip whitespaces
// will NOT consume the character
// will report error on EOF
func (it *Iterator) nextByte() (ret byte, err error) {
if it.head == it.tail {
if err = it.readMore(); err != nil {
return
}
}
return it.buffer[it.head], nil
}
// will consume the characters
func (it *Iterator) expectBytes(s string) error {
last := len(s) - 1
j := 0
for {
i := it.head
for ; i < it.tail; i++ {
c := it.buffer[i]
if c != s[j] {
return UnexpectedByteError{exp: s[j], got: c}
}
if j == last {
it.head = i + 1
return nil
}
j++
}
it.head = i
if err := it.readMore(); err != nil {
return err
}
}
}
// Read until the first valid token is found, only the whitespaces are consumed
func (it *Iterator) nextToken() (ret byte, err error) {
for {
i := it.head
for ; i < it.tail; i++ {
c := it.buffer[i]
if c <= ' ' {
if valueTypeMap[c] == WhiteSpaceValue {
continue
}
}
it.head = i
return c, nil
}
// the head and tail will be reset by readMore
it.head = i
if err := it.readMore(); err != nil {
return 0, err
}
}
}
// NextValueType read until the first valid token is found, only the whitespaces are consumed
func (it *Iterator) NextValueType() (ValueType, error) {
v, err := it.nextToken()
return valueTypeMap[v], err
}
func (it *Iterator) unmarshal(obj interface{}) error {
err := it.ReadVal(obj)
if err != nil {
return err
}
_, err = it.nextToken()
if err == nil {
return ErrDataRemained
}
if err != io.EOF {
return err
}
return nil
}
// Unmarshal behave like standard json.Unmarshal
func (it *Iterator) Unmarshal(data []byte, obj interface{}) error {
it.ResetBytes(data)
return it.unmarshal(obj)
}
// Valid behave like standard json.Valid
func (it *Iterator) Valid(data []byte) bool {
it.ResetBytes(data)
err := it.Skip()
if err != nil {
return false
}
_, err = it.nextToken()
return err == io.EOF
}
// UnmarshalFromReader behave like standard json.Unmarshal but with an io.Reader
func (it *Iterator) UnmarshalFromReader(r io.Reader, obj interface{}) error {
it.Reset(r)
return it.unmarshal(obj)
}