Skip to content

Commit 163fc30

Browse files
authored
Merge pull request #8 from b41sh/feat-jsonpath-selector
feat: support JSON path selector
2 parents 4ba079e + f8aa82d commit 163fc30

File tree

13 files changed

+1364
-775
lines changed

13 files changed

+1364
-775
lines changed

src/functions.rs

Lines changed: 57 additions & 218 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,14 @@ use std::collections::VecDeque;
2020
use super::constants::*;
2121
use super::error::*;
2222
use super::jentry::JEntry;
23-
use super::json_path::JsonPathRef;
2423
use super::number::Number;
2524
use super::parser::decode_value;
2625
use super::value::Value;
26+
use crate::jsonpath::ArrayIndex;
27+
use crate::jsonpath::Index;
28+
use crate::jsonpath::JsonPath;
29+
use crate::jsonpath::Path;
30+
use crate::jsonpath::Selector;
2731

2832
// builtin functions for `JSONB` bytes and `JSON` strings without decode all Values.
2933
// The input value must be valid `JSONB' or `JSON`.
@@ -134,7 +138,58 @@ pub fn array_length(value: &[u8]) -> Option<usize> {
134138
}
135139
}
136140

137-
/// Get the inner value by ignoring case name of `JSONB` object.
141+
/// Get the inner elements of `JSONB` value by JSON path.
142+
/// The return value may contains multiple matching elements.
143+
pub fn get_by_path<'a>(value: &'a [u8], json_path: JsonPath<'a>) -> Vec<Vec<u8>> {
144+
let selector = Selector::new(json_path);
145+
if !is_jsonb(value) {
146+
let json_value = decode_value(value).unwrap();
147+
let value = json_value.to_vec();
148+
selector.select(value.as_slice())
149+
} else {
150+
selector.select(value)
151+
}
152+
}
153+
154+
/// Get the inner element of `JSONB` value by JSON path.
155+
/// If there are multiple matching elements, only the first one is returned
156+
pub fn get_by_path_first<'a>(value: &'a [u8], json_path: JsonPath<'a>) -> Option<Vec<u8>> {
157+
let mut values = get_by_path(value, json_path);
158+
if values.is_empty() {
159+
None
160+
} else {
161+
Some(values.remove(0))
162+
}
163+
}
164+
165+
/// Get the inner elements of `JSONB` value by JSON path.
166+
/// If there are multiple matching elements, return an `JSONB` Array.
167+
pub fn get_by_path_array<'a>(value: &'a [u8], json_path: JsonPath<'a>) -> Option<Vec<u8>> {
168+
let values = get_by_path(value, json_path);
169+
let mut array_value = Vec::new();
170+
let items: Vec<_> = values.iter().map(|v| v.as_slice()).collect();
171+
build_array(items, &mut array_value).unwrap();
172+
Some(array_value)
173+
}
174+
175+
/// Get the inner element of `JSONB` Array by index.
176+
pub fn get_by_index(value: &[u8], index: i32) -> Option<Vec<u8>> {
177+
if index < 0 {
178+
return None;
179+
}
180+
let path = Path::ArrayIndices(vec![ArrayIndex::Index(Index::Index(index))]);
181+
let json_path = JsonPath { paths: vec![path] };
182+
get_by_path_first(value, json_path)
183+
}
184+
185+
/// Get the inner element of `JSONB` Object by key name.
186+
pub fn get_by_name(value: &[u8], name: &str) -> Option<Vec<u8>> {
187+
let path = Path::DotField(Cow::Borrowed(name));
188+
let json_path = JsonPath { paths: vec![path] };
189+
get_by_path_first(value, json_path)
190+
}
191+
192+
/// Get the inner element of `JSONB` Object by key name ignoring case.
138193
pub fn get_by_name_ignore_case(value: &[u8], name: &str) -> Option<Vec<u8>> {
139194
if !is_jsonb(value) {
140195
let json_value = decode_value(value).unwrap();
@@ -203,114 +258,6 @@ pub fn get_by_name_ignore_case(value: &[u8], name: &str) -> Option<Vec<u8>> {
203258
}
204259
}
205260

206-
/// Get the inner value by JSON path of `JSONB` object.
207-
/// JSON path can be a nested index or name,
208-
/// used to get inner value of array and object respectively.
209-
pub fn get_by_path<'a>(value: &'a [u8], paths: Vec<JsonPathRef<'a>>) -> Option<Vec<u8>> {
210-
if !is_jsonb(value) {
211-
let json_value = decode_value(value).unwrap();
212-
return json_value.get_by_path(&paths).map(Value::to_vec);
213-
}
214-
215-
let mut offset = 0;
216-
let mut buf: Vec<u8> = Vec::new();
217-
218-
for i in 0..paths.len() {
219-
let path = paths.get(i).unwrap();
220-
let header = read_u32(value, offset).unwrap();
221-
let (jentry_offset, val_offset) = match path {
222-
JsonPathRef::String(name) => {
223-
if header & CONTAINER_HEADER_TYPE_MASK != OBJECT_CONTAINER_TAG {
224-
return None;
225-
}
226-
let length = (header & CONTAINER_HEADER_LEN_MASK) as usize;
227-
let mut jentry_offset = offset + 4;
228-
let mut val_offset = offset + 8 * length + 4;
229-
230-
let mut key_jentries: VecDeque<JEntry> = VecDeque::with_capacity(length);
231-
for _ in 0..length {
232-
let encoded = read_u32(value, jentry_offset).unwrap();
233-
let key_jentry = JEntry::decode_jentry(encoded);
234-
235-
jentry_offset += 4;
236-
val_offset += key_jentry.length as usize;
237-
key_jentries.push_back(key_jentry);
238-
}
239-
240-
let mut found = false;
241-
let mut key_offset = offset + 8 * length + 4;
242-
while let Some(key_jentry) = key_jentries.pop_front() {
243-
let prev_key_offset = key_offset;
244-
key_offset += key_jentry.length as usize;
245-
let key = unsafe {
246-
std::str::from_utf8_unchecked(&value[prev_key_offset..key_offset])
247-
};
248-
if name.eq(key) {
249-
found = true;
250-
break;
251-
}
252-
let val_encoded = read_u32(value, jentry_offset).unwrap();
253-
let val_jentry = JEntry::decode_jentry(val_encoded);
254-
jentry_offset += 4;
255-
val_offset += val_jentry.length as usize;
256-
}
257-
if !found {
258-
return None;
259-
}
260-
(jentry_offset, val_offset)
261-
}
262-
JsonPathRef::UInt64(index) => {
263-
if header & CONTAINER_HEADER_TYPE_MASK != ARRAY_CONTAINER_TAG {
264-
return None;
265-
}
266-
let length = (header & CONTAINER_HEADER_LEN_MASK) as usize;
267-
if *index as usize >= length {
268-
return None;
269-
}
270-
let mut jentry_offset = offset + 4;
271-
let mut val_offset = offset + 4 * length + 4;
272-
273-
for _ in 0..*index {
274-
let encoded = read_u32(value, jentry_offset).unwrap();
275-
let jentry = JEntry::decode_jentry(encoded);
276-
277-
jentry_offset += 4;
278-
val_offset += jentry.length as usize;
279-
}
280-
(jentry_offset, val_offset)
281-
}
282-
};
283-
let encoded = read_u32(value, jentry_offset).unwrap();
284-
let jentry = JEntry::decode_jentry(encoded);
285-
// if the last JSON path, return the value
286-
// if the value is a container value, then continue get for next JSON path.
287-
match jentry.type_code {
288-
CONTAINER_TAG => {
289-
if i == paths.len() - 1 {
290-
buf.extend_from_slice(&value[val_offset..val_offset + jentry.length as usize]);
291-
} else {
292-
offset = val_offset;
293-
}
294-
}
295-
_ => {
296-
if i == paths.len() - 1 {
297-
let scalar_header = SCALAR_CONTAINER_TAG;
298-
buf.extend_from_slice(&scalar_header.to_be_bytes());
299-
buf.extend_from_slice(&encoded.to_be_bytes());
300-
if jentry.length > 0 {
301-
buf.extend_from_slice(
302-
&value[val_offset..val_offset + jentry.length as usize],
303-
);
304-
}
305-
} else {
306-
return None;
307-
}
308-
}
309-
}
310-
}
311-
Some(buf)
312-
}
313-
314261
/// Get the keys of a `JSONB` object.
315262
pub fn object_keys(value: &[u8]) -> Option<Vec<u8>> {
316263
if !is_jsonb(value) {
@@ -868,104 +815,6 @@ pub fn is_object(value: &[u8]) -> bool {
868815
matches!(header & CONTAINER_HEADER_TYPE_MASK, OBJECT_CONTAINER_TAG)
869816
}
870817

871-
/// Parse path string to Json path.
872-
/// Support `["<name>"]`, `[<index>]`, `:name` and `.name`.
873-
pub fn parse_json_path(path: &[u8]) -> Result<Vec<JsonPathRef>, Error> {
874-
let mut idx = 0;
875-
let mut prev_idx = 0;
876-
let mut json_paths = Vec::new();
877-
while idx < path.len() {
878-
let c = read_char(path, &mut idx)?;
879-
if c == b'[' {
880-
let c = read_char(path, &mut idx)?;
881-
if c == b'"' {
882-
prev_idx = idx;
883-
loop {
884-
let c = read_char(path, &mut idx)?;
885-
if c == b'\\' {
886-
idx += 1;
887-
} else if c == b'"' {
888-
let c = read_char(path, &mut idx)?;
889-
if c != b']' {
890-
return Err(Error::InvalidToken);
891-
}
892-
break;
893-
}
894-
}
895-
if prev_idx == idx - 2 {
896-
return Err(Error::InvalidToken);
897-
}
898-
let s = std::str::from_utf8(&path[prev_idx..idx - 2])?;
899-
let json_path = JsonPathRef::String(Cow::Borrowed(s));
900-
901-
json_paths.push(json_path);
902-
} else {
903-
prev_idx = idx - 1;
904-
loop {
905-
let c = read_char(path, &mut idx)?;
906-
if c == b']' {
907-
break;
908-
}
909-
}
910-
if prev_idx == idx - 1 {
911-
return Err(Error::InvalidToken);
912-
}
913-
let s = std::str::from_utf8(&path[prev_idx..idx - 1])?;
914-
if let Ok(v) = s.parse::<u64>() {
915-
let json_path = JsonPathRef::UInt64(v);
916-
json_paths.push(json_path);
917-
} else {
918-
return Err(Error::InvalidToken);
919-
}
920-
}
921-
} else if c == b'"' {
922-
prev_idx = idx;
923-
loop {
924-
let c = read_char(path, &mut idx)?;
925-
if c == b'\\' {
926-
idx += 1;
927-
} else if c == b'"' {
928-
if idx < path.len() {
929-
return Err(Error::InvalidToken);
930-
}
931-
break;
932-
}
933-
}
934-
let s = std::str::from_utf8(&path[prev_idx..idx - 1])?;
935-
let json_path = JsonPathRef::String(Cow::Borrowed(s));
936-
if json_paths.is_empty() {
937-
json_paths.push(json_path);
938-
} else {
939-
return Err(Error::InvalidToken);
940-
}
941-
} else {
942-
if c == b':' || c == b'.' {
943-
if idx == 1 {
944-
return Err(Error::InvalidToken);
945-
} else {
946-
prev_idx = idx;
947-
}
948-
}
949-
while idx < path.len() {
950-
let c = read_char(path, &mut idx)?;
951-
if c == b':' || c == b'.' || c == b'[' {
952-
idx -= 1;
953-
break;
954-
} else if c == b'\\' {
955-
idx += 1;
956-
}
957-
}
958-
if prev_idx == idx {
959-
return Err(Error::InvalidToken);
960-
}
961-
let s = std::str::from_utf8(&path[prev_idx..idx])?;
962-
let json_path = JsonPathRef::String(Cow::Borrowed(s));
963-
json_paths.push(json_path);
964-
}
965-
}
966-
Ok(json_paths)
967-
}
968-
969818
/// Convert `JSONB` value to String
970819
pub fn to_string(value: &[u8]) -> String {
971820
if !is_jsonb(value) {
@@ -1079,16 +928,6 @@ fn is_jsonb(value: &[u8]) -> bool {
1079928
false
1080929
}
1081930

1082-
fn read_char(buf: &[u8], idx: &mut usize) -> Result<u8, Error> {
1083-
match buf.get(*idx) {
1084-
Some(v) => {
1085-
*idx += 1;
1086-
Ok(*v)
1087-
}
1088-
None => Err(Error::InvalidEOF),
1089-
}
1090-
}
1091-
1092931
fn read_u32(buf: &[u8], idx: usize) -> Result<u32, Error> {
1093932
let bytes: [u8; 4] = buf
1094933
.get(idx..idx + 4)

src/json_path.rs

Lines changed: 0 additions & 47 deletions
This file was deleted.

src/jsonpath/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414

1515
mod parser;
1616
mod path;
17+
mod selector;
1718

1819
pub use parser::parse_json_path;
1920
pub use path::*;
21+
pub use selector::*;

0 commit comments

Comments
 (0)