Skip to content

Commit 895d7f7

Browse files
committed
feat: add get_by_keypath
1 parent 6b3f03e commit 895d7f7

File tree

3 files changed

+236
-61
lines changed

3 files changed

+236
-61
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ byteorder = "1.5.0"
3030
fast-float = "0.2.0"
3131
nom = "7.1.3"
3232
ordered-float = { version = "4.1.1", default-features = false }
33+
3334
rand = { version = "0.8.5", features = ["small_rng"] }
3435
serde_json = { version = "1.0.107", default-features = false, features = [
3536
"preserve_order",
@@ -40,6 +41,7 @@ goldenfile = "1.5.2"
4041
serde_json = "1.0.107"
4142
json-deserializer = "0.4.4"
4243
simd-json = {version = "0.11.1", features = ["allow-non-simd"]}
44+
4345
mockalloc = "0.1.2"
4446
criterion = "0.5.1"
4547

src/functions.rs

Lines changed: 174 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use core::convert::TryInto;
1616
use std::borrow::Cow;
1717
use std::cmp::Ordering;
1818
use std::collections::VecDeque;
19+
use std::str::from_utf8;
1920

2021
use crate::constants::*;
2122
use crate::error::*;
@@ -232,22 +233,13 @@ pub fn get_by_index(value: &[u8], index: usize) -> Option<Vec<u8>> {
232233
let header = read_u32(value, 0).unwrap();
233234
match header & CONTAINER_HEADER_TYPE_MASK {
234235
ARRAY_CONTAINER_TAG => {
235-
let length = (header & CONTAINER_HEADER_LEN_MASK) as usize;
236-
if index >= length {
237-
return None;
238-
}
239-
let mut jentry_offset = 4;
240-
let mut val_offset = 4 * length + 4;
241-
for i in 0..length {
236+
let offsets = get_offsets_by_index(value, 0, header, index);
237+
238+
offsets.map(|(jentry_offset, val_offset)| {
242239
let encoded = read_u32(value, jentry_offset).unwrap();
243240
let jentry = JEntry::decode_jentry(encoded);
244241
let val_length = jentry.length as usize;
245-
if i < index {
246-
jentry_offset += 4;
247-
val_offset += val_length;
248-
continue;
249-
}
250-
let val = match jentry.type_code {
242+
match jentry.type_code {
251243
CONTAINER_TAG => value[val_offset..val_offset + val_length].to_vec(),
252244
_ => {
253245
let mut buf = Vec::with_capacity(8 + val_length);
@@ -258,10 +250,8 @@ pub fn get_by_index(value: &[u8], index: usize) -> Option<Vec<u8>> {
258250
}
259251
buf
260252
}
261-
};
262-
return Some(val);
263-
}
264-
None
253+
}
254+
})
265255
}
266256
_ => None,
267257
}
@@ -289,40 +279,8 @@ pub fn get_by_name(value: &[u8], name: &str, ignore_case: bool) -> Option<Vec<u8
289279
let header = read_u32(value, 0).unwrap();
290280
match header & CONTAINER_HEADER_TYPE_MASK {
291281
OBJECT_CONTAINER_TAG => {
292-
let length = (header & CONTAINER_HEADER_LEN_MASK) as usize;
293-
let mut jentry_offset = 4;
294-
let mut val_offset = 8 * length + 4;
295-
296-
let mut key_jentries: VecDeque<JEntry> = VecDeque::with_capacity(length);
297-
for _ in 0..length {
298-
let encoded = read_u32(value, jentry_offset).unwrap();
299-
let key_jentry = JEntry::decode_jentry(encoded);
300-
301-
jentry_offset += 4;
302-
val_offset += key_jentry.length as usize;
303-
key_jentries.push_back(key_jentry);
304-
}
282+
let offsets = get_offsets_by_name(value, 0, header, name, ignore_case);
305283

306-
let mut offsets = None;
307-
let mut key_offset = 8 * length + 4;
308-
while let Some(key_jentry) = key_jentries.pop_front() {
309-
let prev_key_offset = key_offset;
310-
key_offset += key_jentry.length as usize;
311-
let key =
312-
unsafe { std::str::from_utf8_unchecked(&value[prev_key_offset..key_offset]) };
313-
// first match the value with the same name, if not found,
314-
// then match the value with the ignoring case name.
315-
if name.eq(key) {
316-
offsets = Some((jentry_offset, val_offset));
317-
break;
318-
} else if ignore_case && name.eq_ignore_ascii_case(key) && offsets.is_none() {
319-
offsets = Some((jentry_offset, val_offset));
320-
}
321-
let val_encoded = read_u32(value, jentry_offset).unwrap();
322-
let val_jentry = JEntry::decode_jentry(val_encoded);
323-
jentry_offset += 4;
324-
val_offset += val_jentry.length as usize;
325-
}
326284
if let Some((jentry_offset, val_offset)) = offsets {
327285
let encoded = read_u32(value, jentry_offset).unwrap();
328286
let jentry = JEntry::decode_jentry(encoded);
@@ -348,6 +306,172 @@ pub fn get_by_name(value: &[u8], name: &str, ignore_case: bool) -> Option<Vec<u8
348306
}
349307
}
350308

309+
/// Extracts JSON sub-object at the specified path,
310+
/// where path elements can be either field keys or array indexes encoded in utf-8 string.
311+
pub fn get_by_keypath<'a, I: Iterator<Item = &'a [u8]>>(
312+
value: &[u8],
313+
keypath: I,
314+
) -> Option<Vec<u8>> {
315+
if !is_jsonb(value) {
316+
return match parse_value(value) {
317+
Ok(val) => {
318+
let mut current_val = &val;
319+
for key in keypath {
320+
match from_utf8(key) {
321+
Ok(k) => {
322+
let res = match current_val {
323+
Value::Array(arr) => match k.parse::<usize>() {
324+
Ok(idx) => arr.get(idx),
325+
Err(_) => None,
326+
},
327+
Value::Object(obj) => obj.get(k),
328+
_ => None,
329+
};
330+
match res {
331+
Some(v) => current_val = v,
332+
None => return None,
333+
};
334+
}
335+
Err(_) => return None,
336+
}
337+
}
338+
Some(current_val.to_vec())
339+
}
340+
Err(_) => None,
341+
};
342+
}
343+
344+
let mut curr_val_offset = 0;
345+
let mut curr_jentry_encoded = 0;
346+
let mut curr_jentry: Option<JEntry> = None;
347+
348+
for key in keypath {
349+
match from_utf8(key) {
350+
Ok(k) => {
351+
if let Some(ref jentry) = curr_jentry {
352+
if jentry.type_code != CONTAINER_TAG {
353+
return None;
354+
}
355+
};
356+
let header = read_u32(value, curr_val_offset).unwrap();
357+
match header & CONTAINER_HEADER_TYPE_MASK {
358+
OBJECT_CONTAINER_TAG => {
359+
match get_offsets_by_name(value, curr_val_offset, header, k, false) {
360+
Some((jentry_offset, value_offset)) => {
361+
curr_jentry_encoded = read_u32(value, jentry_offset).unwrap();
362+
curr_jentry = Some(JEntry::decode_jentry(curr_jentry_encoded));
363+
curr_val_offset = value_offset;
364+
}
365+
None => return None,
366+
};
367+
}
368+
ARRAY_CONTAINER_TAG => match k.parse::<usize>() {
369+
Ok(idx) => {
370+
match get_offsets_by_index(value, curr_val_offset, header, idx) {
371+
Some((jentry_offset, value_offset)) => {
372+
curr_jentry_encoded = read_u32(value, jentry_offset).unwrap();
373+
curr_jentry = Some(JEntry::decode_jentry(curr_jentry_encoded));
374+
curr_val_offset = value_offset;
375+
}
376+
None => return None,
377+
}
378+
}
379+
Err(_) => return None,
380+
},
381+
_ => return None,
382+
}
383+
}
384+
Err(_) => return None,
385+
}
386+
}
387+
curr_jentry.map(|jentry| {
388+
let val_length = jentry.length as usize;
389+
match jentry.type_code {
390+
CONTAINER_TAG => value[curr_val_offset..curr_val_offset + val_length].to_vec(),
391+
_ => {
392+
let mut buf: Vec<u8> = Vec::with_capacity(val_length + 8);
393+
let scalar_header = SCALAR_CONTAINER_TAG;
394+
buf.extend_from_slice(&scalar_header.to_be_bytes());
395+
buf.extend_from_slice(&curr_jentry_encoded.to_be_bytes());
396+
if val_length > 0 {
397+
buf.extend_from_slice(&value[curr_val_offset..curr_val_offset + val_length]);
398+
}
399+
buf
400+
}
401+
}
402+
})
403+
}
404+
405+
fn get_offsets_by_name(
406+
value: &[u8],
407+
offset: usize,
408+
header: u32,
409+
name: &str,
410+
ignore_case: bool,
411+
) -> Option<(usize, usize)> {
412+
let length = (header & CONTAINER_HEADER_LEN_MASK) as usize;
413+
let mut jentry_offset = offset + 4;
414+
let mut val_offset = offset + 8 * length + 4;
415+
416+
let mut key_jentries: VecDeque<JEntry> = VecDeque::with_capacity(length);
417+
for _ in 0..length {
418+
let encoded = read_u32(value, jentry_offset).unwrap();
419+
let key_jentry = JEntry::decode_jentry(encoded);
420+
421+
jentry_offset += 4;
422+
val_offset += key_jentry.length as usize;
423+
key_jentries.push_back(key_jentry);
424+
}
425+
426+
let mut offsets = None;
427+
let mut key_offset = offset + 8 * length + 4;
428+
while let Some(key_jentry) = key_jentries.pop_front() {
429+
let prev_key_offset = key_offset;
430+
key_offset += key_jentry.length as usize;
431+
let key = unsafe { std::str::from_utf8_unchecked(&value[prev_key_offset..key_offset]) };
432+
// first match the value with the same name, if not found,
433+
// then match the value with the ignoring case name.
434+
if name.eq(key) {
435+
offsets = Some((jentry_offset, val_offset));
436+
break;
437+
} else if ignore_case && name.eq_ignore_ascii_case(key) && offsets.is_none() {
438+
offsets = Some((jentry_offset, val_offset));
439+
}
440+
let val_encoded = read_u32(value, jentry_offset).unwrap();
441+
let val_jentry = JEntry::decode_jentry(val_encoded);
442+
jentry_offset += 4;
443+
val_offset += val_jentry.length as usize;
444+
}
445+
offsets
446+
}
447+
448+
fn get_offsets_by_index(
449+
value: &[u8],
450+
offset: usize,
451+
header: u32,
452+
index: usize,
453+
) -> Option<(usize, usize)> {
454+
let length = (header & CONTAINER_HEADER_LEN_MASK) as usize;
455+
if index >= length {
456+
return None;
457+
}
458+
let mut jentry_offset = offset + 4;
459+
let mut val_offset = offset + 4 * length + 4;
460+
461+
for i in 0..length {
462+
let encoded = read_u32(value, jentry_offset).unwrap();
463+
let jentry = JEntry::decode_jentry(encoded);
464+
let val_length = jentry.length as usize;
465+
if i < index {
466+
jentry_offset += 4;
467+
val_offset += val_length;
468+
continue;
469+
}
470+
return Some((jentry_offset, val_offset));
471+
}
472+
None
473+
}
474+
351475
/// Get the keys of a `JSONB` object.
352476
pub fn object_keys(value: &[u8]) -> Option<Vec<u8>> {
353477
if !is_jsonb(value) {

tests/it/functions.rs

Lines changed: 60 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@ use std::collections::BTreeMap;
1818

1919
use jsonb::{
2020
array_length, array_values, as_bool, as_null, as_number, as_str, build_array, build_object,
21-
compare, convert_to_comparable, from_slice, get_by_index, get_by_name, get_by_path, is_array,
22-
is_object, object_each, object_keys, parse_value, path_exists, strip_nulls, to_bool, to_f64,
23-
to_i64, to_pretty_string, to_str, to_string, to_u64, traverse_check_string, type_of, Number,
24-
Object, Value,
21+
compare, convert_to_comparable, from_slice, get_by_index, get_by_keypath, get_by_name,
22+
get_by_path, is_array, is_object, object_each, object_keys, parse_value, path_exists,
23+
strip_nulls, to_bool, to_f64, to_i64, to_pretty_string, to_str, to_string, to_u64,
24+
traverse_check_string, type_of, Number, Object, Value,
2525
};
2626

2727
use jsonb::jsonpath::parse_json_path;
28+
use nom::AsBytes;
2829

2930
#[test]
3031
fn test_build_array() {
@@ -979,13 +980,6 @@ fn test_type_of() {
979980

980981
#[test]
981982
fn test_object_each() {
982-
fn init_object<'a>(entries: Vec<(&str, Value<'a>)>) -> Value<'a> {
983-
let mut map = BTreeMap::new();
984-
for (key, val) in entries {
985-
map.insert(key.to_string(), val);
986-
}
987-
Value::Object(map)
988-
}
989983
let sources = vec![
990984
("true", None),
991985
(r#"[1,2,3]"#, None),
@@ -1044,3 +1038,58 @@ fn test_object_each() {
10441038
}
10451039
}
10461040
}
1041+
1042+
#[test]
1043+
fn test_get_by_keypath() {
1044+
let sources = vec![
1045+
("null", vec!["a", "b"], None),
1046+
("true", vec!["a", "b"], None),
1047+
(r#""sdasd""#, vec!["1"], None),
1048+
(
1049+
"[10,20,30]",
1050+
vec!["1"],
1051+
Some(Value::Number(Number::UInt64(20))),
1052+
),
1053+
(
1054+
r#"[10,20,["a","b","c"]]"#,
1055+
vec!["2", "0"],
1056+
Some(Value::String(Cow::from("a"))),
1057+
),
1058+
(r#"[10,20,["a","b","c"]]"#, vec!["2", "a"], None),
1059+
(
1060+
r#"[10,20,[{"k1":[1,2,3],"k2":{"w":1,"z":2}},"b","c"]]"#,
1061+
vec!["2", "0", "k2"],
1062+
Some(init_object(vec![
1063+
("w", Value::Number(Number::UInt64(1))),
1064+
("z", Value::Number(Number::UInt64(2))),
1065+
])),
1066+
),
1067+
];
1068+
for (json_str, path_str, expected) in sources {
1069+
let path = path_str.into_iter().map(|p| p.as_bytes());
1070+
{
1071+
let json = parse_value(json_str.as_bytes()).unwrap().to_vec();
1072+
let result = get_by_keypath(&json, path.clone());
1073+
match expected.clone() {
1074+
Some(e) => assert_eq!(e, from_slice(&result.unwrap()).unwrap()),
1075+
None => assert_eq!(result, None),
1076+
}
1077+
}
1078+
{
1079+
let json = json_str.as_bytes();
1080+
let result = get_by_keypath(json, path);
1081+
match expected {
1082+
Some(e) => assert_eq!(e, from_slice(&result.unwrap()).unwrap()),
1083+
None => assert_eq!(result, None),
1084+
}
1085+
}
1086+
}
1087+
}
1088+
1089+
fn init_object<'a>(entries: Vec<(&str, Value<'a>)>) -> Value<'a> {
1090+
let mut map = BTreeMap::new();
1091+
for (key, val) in entries {
1092+
map.insert(key.to_string(), val);
1093+
}
1094+
Value::Object(map)
1095+
}

0 commit comments

Comments
 (0)