From 5c69908047fdda180f4c81b52bb06156a7986c2d Mon Sep 17 00:00:00 2001 From: Denys Misko Date: Wed, 17 Aug 2022 11:04:40 +1000 Subject: [PATCH] Implement EachRawKey --- parser.go | 252 ++++++++++++++++++++++++++++++++++++++++++++++++- parser_test.go | 81 ++++++++++++++++ 2 files changed, 329 insertions(+), 4 deletions(-) diff --git a/parser.go b/parser.go index 76ec02d..0f30c82 100644 --- a/parser.go +++ b/parser.go @@ -380,6 +380,254 @@ func sameTree(p1, p2 []string) bool { const stackArraySize = 128 +func getRawValue(data []byte, offset int) ([]byte, int, error) { + endOffset := offset + + if data[offset] == '"' { + if idx, _ := stringEnd(data[offset+1:]); idx != -1 { + endOffset += idx + 1 + } else { + return nil, offset, MalformedStringError + } + } else if data[offset] == '[' { // if array value + // break label, for stopping nested loops + endOffset = blockEnd(data[offset:], '[', ']') + + if endOffset == -1 { + return nil, offset, MalformedArrayError + } + + endOffset += offset + } else if data[offset] == '{' { // if object value + // break label, for stopping nested loops + endOffset = blockEnd(data[offset:], '{', '}') + + if endOffset == -1 { + return nil, offset, MalformedObjectError + } + + endOffset += offset + } else { + // Number, Boolean or None + end := tokenEnd(data[endOffset:]) + + if end == -1 { + return nil, offset, MalformedValueError + } + + endOffset += end + } + return data[offset:endOffset], endOffset, nil +} + +func internalRawGet(data []byte) (value []byte, err error) { + // Go to closest value + nO := nextToken(data) + if nO == -1 { + return nil, MalformedJsonError + } + + value, _, err = getRawValue(data, nO) + if err != nil { + return value, err + } + + return value[:len(value):len(value)], nil +} + +func EachRawKey(data []byte, cb func(int, []byte, error), paths ...[]string) int { + var x struct{} + var level, pathsMatched, i int + ln := len(data) + + pathFlags := make([]bool, stackArraySize)[:] + if len(paths) > cap(pathFlags) { + pathFlags = make([]bool, len(paths))[:] + } + pathFlags = pathFlags[0:len(paths)] + + var maxPath int + for _, p := range paths { + if len(p) > maxPath { + maxPath = len(p) + } + } + + pathsBuf := make([]string, stackArraySize)[:] + if maxPath > cap(pathsBuf) { + pathsBuf = make([]string, maxPath)[:] + } + pathsBuf = pathsBuf[0:maxPath] + + for i < ln { + switch data[i] { + case '"': + i++ + keyBegin := i + + strEnd, keyEscaped := stringEnd(data[i:]) + if strEnd == -1 { + return -1 + } + i += strEnd + + keyEnd := i - 1 + + valueOffset := nextToken(data[i:]) + if valueOffset == -1 { + return -1 + } + + i += valueOffset + + // if string is a key, and key level match + if data[i] == ':' { + match := -1 + key := data[keyBegin:keyEnd] + + // for unescape: if there are no escape sequences, this is cheap; if there are, it is a + // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize + var keyUnesc []byte + if !keyEscaped { + keyUnesc = key + } else { + var stackbuf [unescapeStackBufSize]byte + if ku, err := Unescape(key, stackbuf[:]); err != nil { + return -1 + } else { + keyUnesc = ku + } + } + + if maxPath >= level { + if level < 1 { + cb(-1, nil, MalformedJsonError) + return -1 + } + + pathsBuf[level-1] = bytesToString(&keyUnesc) + for pi, p := range paths { + if len(p) != level || pathFlags[pi] || !equalStr(&keyUnesc, p[level-1]) || !sameTree(p, pathsBuf[:level]) { + continue + } + + match = pi + + pathsMatched++ + pathFlags[pi] = true + + v, e := internalRawGet(data[i+1:]) + cb(pi, v, e) + + if pathsMatched == len(paths) { + break + } + } + if pathsMatched == len(paths) { + return i + } + } + + if match == -1 { + tokenOffset := nextToken(data[i+1:]) + i += tokenOffset + + if data[i] == '{' { + blockSkip := blockEnd(data[i:], '{', '}') + i += blockSkip + 1 + } + } + + if i < ln { + switch data[i] { + case '{', '}', '[', '"': + i-- + } + } + } else { + i-- + } + case '{': + level++ + case '}': + level-- + case '[': + var ok bool + arrIdxFlags := make(map[int]struct{}) + + pIdxFlags := make([]bool, stackArraySize)[:] + if len(paths) > cap(pIdxFlags) { + pIdxFlags = make([]bool, len(paths))[:] + } + pIdxFlags = pIdxFlags[0:len(paths)] + + if level < 0 { + cb(-1, nil, MalformedJsonError) + return -1 + } + + for pi, p := range paths { + if len(p) < level+1 || pathFlags[pi] || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) { + continue + } + if len(p[level]) >= 2 { + aIdx, _ := strconv.Atoi(p[level][1 : len(p[level])-1]) + arrIdxFlags[aIdx] = x + pIdxFlags[pi] = true + } + } + + if len(arrIdxFlags) > 0 { + level++ + + var curIdx int + arrOff, _ := ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) { + if _, ok = arrIdxFlags[curIdx]; ok { + for pi, p := range paths { + if pIdxFlags[pi] { + aIdx, _ := strconv.Atoi(p[level-1][1 : len(p[level-1])-1]) + + if curIdx == aIdx { + of := searchKeys(value, p[level:]...) + + pathsMatched++ + pathFlags[pi] = true + + if of != -1 { + v, er := internalRawGet(value[of:]) + cb(pi, v, er) + } + } + } + } + } + + curIdx += 1 + }) + + if pathsMatched == len(paths) { + return i + } + + i += arrOff - 1 + } else { + // Do not search for keys inside arrays + if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 { + return -1 + } else { + i += arraySkip - 1 + } + } + case ']': + level-- + } + + i++ + } + + return -1 +} + func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]string) int { var x struct{} var level, pathsMatched, i int @@ -707,12 +955,10 @@ func WriteToBuffer(buffer []byte, str string) int { } /* - Del - Receives existing data structure, path to delete. Returns: `data` - return modified data - */ func Delete(data []byte, keys ...string) []byte { lk := len(keys) @@ -793,13 +1039,11 @@ func Delete(data []byte, keys ...string) []byte { } /* - Set - Receives existing data structure, path to set, and data to set at that key. Returns: `value` - modified byte array `err` - On any parsing error - */ func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) { // ensure keys are set diff --git a/parser_test.go b/parser_test.go index 7036feb..df3b89f 100644 --- a/parser_test.go +++ b/parser_test.go @@ -1787,6 +1787,87 @@ func TestEachKey(t *testing.T) { } } +func TestEachRawKey(t *testing.T) { + paths := [][]string{ + {"name"}, + {"order"}, + {"nested", "a"}, + {"nested", "b"}, + {"nested2", "a"}, + {"nested", "nested3", "b"}, + {"arr", "[1]", "b"}, + {"arrInt", "[3]"}, + {"arrInt", "[5]"}, // Should not find last key + {"nested"}, + {"arr", "["}, // issue#177 Invalid arguments + {"a\n", "b\n"}, // issue#165 + {"nested", "b"}, // Should find repeated key + } + + keysFound := 0 + + EachRawKey(testJson, func(idx int, value []byte, err error) { + keysFound++ + + switch idx { + case 0: + if string(value) != `"Name"` { + t.Error("Should find 1 key", string(value)) + } + case 1: + if string(value) != `"Order"` { + t.Errorf("Should find 2 key") + } + case 2: + if string(value) != `"test"` { + t.Errorf("Should find 3 key") + } + case 3: + if string(value) != "2" { + t.Errorf("Should find 4 key") + } + case 4: + if string(value) != `"test2"` { + t.Error("Should find 5 key", string(value)) + } + case 5: + if string(value) != "4" { + t.Errorf("Should find 6 key") + } + case 6: + if string(value) != "2" { + t.Errorf("Should find 7 key") + } + case 7: + if string(value) != "4" { + t.Error("Should find 8 key", string(value)) + } + case 8: + t.Errorf("Found key #8 that should not be found") + case 9: + if string(value) != `{"a":"test", "b":2, "nested3":{"a":"test3","b":4}, "c": "unknown"}` { + t.Error("Should find 9 key", string(value)) + } + case 10: + t.Errorf("Found key #10 that should not be found") + case 11: + if string(value) != "99" { + t.Error("Should find 10 key", string(value)) + } + case 12: + if string(value) != "2" { + t.Errorf("Should find 11 key") + } + default: + t.Errorf("Should find only 10 keys, got %v key", idx) + } + }, paths...) + + if keysFound != 11 { + t.Errorf("Should find 11 keys: %d", keysFound) + } +} + type ParseTest struct { in string intype ValueType