diff --git a/src/functions.rs b/src/functions.rs index bf9a85d..700d15d 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -20,10 +20,14 @@ use std::collections::VecDeque; use super::constants::*; use super::error::*; use super::jentry::JEntry; -use super::json_path::JsonPathRef; use super::number::Number; use super::parser::decode_value; use super::value::Value; +use crate::jsonpath::ArrayIndex; +use crate::jsonpath::Index; +use crate::jsonpath::JsonPath; +use crate::jsonpath::Path; +use crate::jsonpath::Selector; // builtin functions for `JSONB` bytes and `JSON` strings without decode all Values. // The input value must be valid `JSONB' or `JSON`. @@ -134,7 +138,58 @@ pub fn array_length(value: &[u8]) -> Option { } } -/// Get the inner value by ignoring case name of `JSONB` object. +/// Get the inner elements of `JSONB` value by JSON path. +/// The return value may contains multiple matching elements. +pub fn get_by_path<'a>(value: &'a [u8], json_path: JsonPath<'a>) -> Vec> { + let selector = Selector::new(json_path); + if !is_jsonb(value) { + let json_value = decode_value(value).unwrap(); + let value = json_value.to_vec(); + selector.select(value.as_slice()) + } else { + selector.select(value) + } +} + +/// Get the inner element of `JSONB` value by JSON path. +/// If there are multiple matching elements, only the first one is returned +pub fn get_by_path_first<'a>(value: &'a [u8], json_path: JsonPath<'a>) -> Option> { + let mut values = get_by_path(value, json_path); + if values.is_empty() { + None + } else { + Some(values.remove(0)) + } +} + +/// Get the inner elements of `JSONB` value by JSON path. +/// If there are multiple matching elements, return an `JSONB` Array. +pub fn get_by_path_array<'a>(value: &'a [u8], json_path: JsonPath<'a>) -> Option> { + let values = get_by_path(value, json_path); + let mut array_value = Vec::new(); + let items: Vec<_> = values.iter().map(|v| v.as_slice()).collect(); + build_array(items, &mut array_value).unwrap(); + Some(array_value) +} + +/// Get the inner element of `JSONB` Array by index. +pub fn get_by_index(value: &[u8], index: i32) -> Option> { + if index < 0 { + return None; + } + let path = Path::ArrayIndices(vec![ArrayIndex::Index(Index::Index(index))]); + let json_path = JsonPath { paths: vec![path] }; + get_by_path_first(value, json_path) +} + +/// Get the inner element of `JSONB` Object by key name. +pub fn get_by_name(value: &[u8], name: &str) -> Option> { + let path = Path::DotField(Cow::Borrowed(name)); + let json_path = JsonPath { paths: vec![path] }; + get_by_path_first(value, json_path) +} + +/// Get the inner element of `JSONB` Object by key name ignoring case. pub fn get_by_name_ignore_case(value: &[u8], name: &str) -> Option> { if !is_jsonb(value) { let json_value = decode_value(value).unwrap(); @@ -203,114 +258,6 @@ pub fn get_by_name_ignore_case(value: &[u8], name: &str) -> Option> { } } -/// Get the inner value by JSON path of `JSONB` object. -/// JSON path can be a nested index or name, -/// used to get inner value of array and object respectively. -pub fn get_by_path<'a>(value: &'a [u8], paths: Vec>) -> Option> { - if !is_jsonb(value) { - let json_value = decode_value(value).unwrap(); - return json_value.get_by_path(&paths).map(Value::to_vec); - } - - let mut offset = 0; - let mut buf: Vec = Vec::new(); - - for i in 0..paths.len() { - let path = paths.get(i).unwrap(); - let header = read_u32(value, offset).unwrap(); - let (jentry_offset, val_offset) = match path { - JsonPathRef::String(name) => { - if header & CONTAINER_HEADER_TYPE_MASK != OBJECT_CONTAINER_TAG { - return None; - } - let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; - let mut jentry_offset = offset + 4; - let mut val_offset = offset + 8 * length + 4; - - let mut key_jentries: VecDeque = VecDeque::with_capacity(length); - for _ in 0..length { - let encoded = read_u32(value, jentry_offset).unwrap(); - let key_jentry = JEntry::decode_jentry(encoded); - - jentry_offset += 4; - val_offset += key_jentry.length as usize; - key_jentries.push_back(key_jentry); - } - - let mut found = false; - let mut key_offset = offset + 8 * length + 4; - while let Some(key_jentry) = key_jentries.pop_front() { - let prev_key_offset = key_offset; - key_offset += key_jentry.length as usize; - let key = unsafe { - std::str::from_utf8_unchecked(&value[prev_key_offset..key_offset]) - }; - if name.eq(key) { - found = true; - break; - } - let val_encoded = read_u32(value, jentry_offset).unwrap(); - let val_jentry = JEntry::decode_jentry(val_encoded); - jentry_offset += 4; - val_offset += val_jentry.length as usize; - } - if !found { - return None; - } - (jentry_offset, val_offset) - } - JsonPathRef::UInt64(index) => { - if header & CONTAINER_HEADER_TYPE_MASK != ARRAY_CONTAINER_TAG { - return None; - } - let length = (header & CONTAINER_HEADER_LEN_MASK) as usize; - if *index as usize >= length { - return None; - } - let mut jentry_offset = offset + 4; - let mut val_offset = offset + 4 * length + 4; - - for _ in 0..*index { - let encoded = read_u32(value, jentry_offset).unwrap(); - let jentry = JEntry::decode_jentry(encoded); - - jentry_offset += 4; - val_offset += jentry.length as usize; - } - (jentry_offset, val_offset) - } - }; - let encoded = read_u32(value, jentry_offset).unwrap(); - let jentry = JEntry::decode_jentry(encoded); - // if the last JSON path, return the value - // if the value is a container value, then continue get for next JSON path. - match jentry.type_code { - CONTAINER_TAG => { - if i == paths.len() - 1 { - buf.extend_from_slice(&value[val_offset..val_offset + jentry.length as usize]); - } else { - offset = val_offset; - } - } - _ => { - if i == paths.len() - 1 { - let scalar_header = SCALAR_CONTAINER_TAG; - buf.extend_from_slice(&scalar_header.to_be_bytes()); - buf.extend_from_slice(&encoded.to_be_bytes()); - if jentry.length > 0 { - buf.extend_from_slice( - &value[val_offset..val_offset + jentry.length as usize], - ); - } - } else { - return None; - } - } - } - } - Some(buf) -} - /// Get the keys of a `JSONB` object. pub fn object_keys(value: &[u8]) -> Option> { if !is_jsonb(value) { @@ -868,104 +815,6 @@ pub fn is_object(value: &[u8]) -> bool { matches!(header & CONTAINER_HEADER_TYPE_MASK, OBJECT_CONTAINER_TAG) } -/// Parse path string to Json path. -/// Support `[""]`, `[]`, `:name` and `.name`. -pub fn parse_json_path(path: &[u8]) -> Result, Error> { - let mut idx = 0; - let mut prev_idx = 0; - let mut json_paths = Vec::new(); - while idx < path.len() { - let c = read_char(path, &mut idx)?; - if c == b'[' { - let c = read_char(path, &mut idx)?; - if c == b'"' { - prev_idx = idx; - loop { - let c = read_char(path, &mut idx)?; - if c == b'\\' { - idx += 1; - } else if c == b'"' { - let c = read_char(path, &mut idx)?; - if c != b']' { - return Err(Error::InvalidToken); - } - break; - } - } - if prev_idx == idx - 2 { - return Err(Error::InvalidToken); - } - let s = std::str::from_utf8(&path[prev_idx..idx - 2])?; - let json_path = JsonPathRef::String(Cow::Borrowed(s)); - - json_paths.push(json_path); - } else { - prev_idx = idx - 1; - loop { - let c = read_char(path, &mut idx)?; - if c == b']' { - break; - } - } - if prev_idx == idx - 1 { - return Err(Error::InvalidToken); - } - let s = std::str::from_utf8(&path[prev_idx..idx - 1])?; - if let Ok(v) = s.parse::() { - let json_path = JsonPathRef::UInt64(v); - json_paths.push(json_path); - } else { - return Err(Error::InvalidToken); - } - } - } else if c == b'"' { - prev_idx = idx; - loop { - let c = read_char(path, &mut idx)?; - if c == b'\\' { - idx += 1; - } else if c == b'"' { - if idx < path.len() { - return Err(Error::InvalidToken); - } - break; - } - } - let s = std::str::from_utf8(&path[prev_idx..idx - 1])?; - let json_path = JsonPathRef::String(Cow::Borrowed(s)); - if json_paths.is_empty() { - json_paths.push(json_path); - } else { - return Err(Error::InvalidToken); - } - } else { - if c == b':' || c == b'.' { - if idx == 1 { - return Err(Error::InvalidToken); - } else { - prev_idx = idx; - } - } - while idx < path.len() { - let c = read_char(path, &mut idx)?; - if c == b':' || c == b'.' || c == b'[' { - idx -= 1; - break; - } else if c == b'\\' { - idx += 1; - } - } - if prev_idx == idx { - return Err(Error::InvalidToken); - } - let s = std::str::from_utf8(&path[prev_idx..idx])?; - let json_path = JsonPathRef::String(Cow::Borrowed(s)); - json_paths.push(json_path); - } - } - Ok(json_paths) -} - /// Convert `JSONB` value to String pub fn to_string(value: &[u8]) -> String { if !is_jsonb(value) { @@ -1079,16 +928,6 @@ fn is_jsonb(value: &[u8]) -> bool { false } -fn read_char(buf: &[u8], idx: &mut usize) -> Result { - match buf.get(*idx) { - Some(v) => { - *idx += 1; - Ok(*v) - } - None => Err(Error::InvalidEOF), - } -} - fn read_u32(buf: &[u8], idx: usize) -> Result { let bytes: [u8; 4] = buf .get(idx..idx + 4) diff --git a/src/json_path.rs b/src/json_path.rs deleted file mode 100644 index a50092b..0000000 --- a/src/json_path.rs +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2023 Datafuse Labs. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::borrow::Cow; -use std::fmt::Debug; -use std::string::ToString; - -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, PartialEq, Eq, Hash)] -pub enum JsonPath { - String(String), - UInt64(u64), -} - -impl ToString for JsonPath { - fn to_string(&self) -> String { - match self { - JsonPath::String(s) => format!("['{}']", s), - JsonPath::UInt64(n) => format!("[{}]", n), - } - } -} - -impl<'a> JsonPath { - pub fn as_ref(&'a self) -> JsonPathRef<'a> { - match self { - JsonPath::String(v) => JsonPathRef::String(Cow::from(v)), - JsonPath::UInt64(v) => JsonPathRef::UInt64(*v), - } - } -} - -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum JsonPathRef<'a> { - String(Cow<'a, str>), - UInt64(u64), -} diff --git a/src/jsonpath/mod.rs b/src/jsonpath/mod.rs index a81032e..6861639 100644 --- a/src/jsonpath/mod.rs +++ b/src/jsonpath/mod.rs @@ -14,6 +14,8 @@ mod parser; mod path; +mod selector; pub use parser::parse_json_path; pub use path::*; +pub use selector::*; diff --git a/src/jsonpath/parser.rs b/src/jsonpath/parser.rs index b0f7ae7..b14e204 100644 --- a/src/jsonpath/parser.rs +++ b/src/jsonpath/parser.rs @@ -15,19 +15,21 @@ use nom::{ branch::alt, bytes::complete::{escaped, tag, tag_no_case}, - character::complete::{alphanumeric1, char, i32, i64, multispace0, one_of, u32, u64}, + character::complete::{alphanumeric1, char, i32, i64, multispace0, one_of, u64}, combinator::{map, opt, value}, - multi::{many1, separated_list1}, + multi::{many0, separated_list1}, number::complete::double, - sequence::{delimited, preceded, terminated, tuple}, + sequence::{delimited, pair, preceded, separated_pair, terminated, tuple}, IResult, }; use crate::error::Error; use crate::jsonpath::path::*; +use crate::number::Number; +use std::borrow::Cow; /// Parsing the input string to JSON Path. -pub fn parse_json_path(input: &str) -> Result { +pub fn parse_json_path(input: &[u8]) -> Result, Error> { match json_path(input) { Ok((rest, json_path)) => { if !rest.is_empty() { @@ -40,24 +42,22 @@ pub fn parse_json_path(input: &str) -> Result { } } -fn json_path(input: &str) -> IResult<&str, JsonPath> { - map(delimited(multispace0, many1(path), multispace0), |paths| { +fn json_path(input: &[u8]) -> IResult<&[u8], JsonPath<'_>> { + map(delimited(multispace0, paths, multispace0), |paths| { JsonPath { paths } })(input) } -fn raw_string(input: &str) -> IResult<&str, &str> { +fn raw_string(input: &[u8]) -> IResult<&[u8], &[u8]> { escaped(alphanumeric1, '\\', one_of("\"n\\"))(input) } -fn string(input: &str) -> IResult<&str, &str> { - alt(( - delimited(char('\''), raw_string, char('\'')), - delimited(char('"'), raw_string, char('"')), - ))(input) +fn string(input: &[u8]) -> IResult<&[u8], &[u8]> { + // TODO: support special characters and unicode characters. + delimited(char('"'), raw_string, char('"'))(input) } -fn bracket_wildcard(input: &str) -> IResult<&str, ()> { +fn bracket_wildcard(input: &[u8]) -> IResult<&[u8], ()> { value( (), delimited( @@ -68,151 +68,184 @@ fn bracket_wildcard(input: &str) -> IResult<&str, ()> { )(input) } -fn dot_field(input: &str) -> IResult<&str, &str> { - preceded(char('.'), alphanumeric1)(input) +fn colon_field(input: &[u8]) -> IResult<&[u8], &[u8]> { + preceded(char(':'), alphanumeric1)(input) } -fn descent_field(input: &str) -> IResult<&str, &str> { - preceded(tag(".."), alphanumeric1)(input) +fn dot_field(input: &[u8]) -> IResult<&[u8], &[u8]> { + alt(( + preceded(char('.'), alphanumeric1), + preceded(char('.'), string), + ))(input) } -fn array_index(input: &str) -> IResult<&str, i32> { +fn object_field(input: &[u8]) -> IResult<&[u8], &[u8]> { delimited( terminated(char('['), multispace0), - i32, + string, preceded(multispace0, char(']')), )(input) } -fn array_indices(input: &str) -> IResult<&str, Vec> { - delimited( - terminated(char('['), multispace0), - separated_list1(delimited(multispace0, char(','), multispace0), i32), - preceded(multispace0, char(']')), - )(input) +fn index(input: &[u8]) -> IResult<&[u8], Index> { + alt(( + map(i32, Index::Index), + map( + preceded( + tuple((tag_no_case("last"), multispace0, char('-'), multispace0)), + i32, + ), + |v| Index::LastIndex(v.saturating_neg()), + ), + map( + preceded( + tuple((tag_no_case("last"), multispace0, char('+'), multispace0)), + i32, + ), + Index::LastIndex, + ), + map(tag_no_case("last"), |_| Index::LastIndex(0)), + ))(input) } -fn object_field(input: &str) -> IResult<&str, &str> { - delimited( - terminated(char('['), multispace0), - string, - preceded(multispace0, char(']')), - )(input) +fn array_index(input: &[u8]) -> IResult<&[u8], ArrayIndex> { + alt(( + map( + separated_pair( + index, + delimited(multispace0, tag_no_case("to"), multispace0), + index, + ), + |(s, e)| ArrayIndex::Slice((s, e)), + ), + map(index, ArrayIndex::Index), + ))(input) } -fn object_fields(input: &str) -> IResult<&str, Vec<&str>> { +fn array_indices(input: &[u8]) -> IResult<&[u8], Vec> { delimited( terminated(char('['), multispace0), - separated_list1(delimited(multispace0, char(','), multispace0), string), + separated_list1(delimited(multispace0, char(','), multispace0), array_index), preceded(multispace0, char(']')), )(input) } -fn array_slice(input: &str) -> IResult<&str, Path> { - map( - delimited( - char('['), - tuple(( - delimited(multispace0, opt(i32), multispace0), - char(':'), - delimited(multispace0, opt(i32), multispace0), - opt(preceded( - char(':'), - delimited(multispace0, u32, multispace0), - )), - )), - char(']'), - ), - |(opt_start, _, opt_end, opt_step)| Path::ArraySlice { - start: opt_start, - end: opt_end, - step: opt_step, - }, - )(input) -} - -fn path(input: &str) -> IResult<&str, Path> { +fn inner_path(input: &[u8]) -> IResult<&[u8], Path<'_>> { alt(( - value(Path::Root, char('$')), - value(Path::Current, char('@')), value(Path::DotWildcard, tag(".*")), - value(Path::DescentWildcard, tag("..*")), value(Path::BracketWildcard, bracket_wildcard), - map(dot_field, |v| Path::DotField(v.to_string())), - map(descent_field, |v| Path::DescentField(v.to_string())), - map(array_index, Path::ArrayIndex), + map(colon_field, |v| { + Path::ColonField(Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(v) })) + }), + map(dot_field, |v| { + Path::DotField(Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(v) })) + }), + map(object_field, |v| { + Path::ObjectField(Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(v) })) + }), map(array_indices, Path::ArrayIndices), - map(object_field, |v| Path::ObjectField(v.to_string())), - map(object_fields, |v| { - let fields = v.iter().map(|s| s.to_string()).collect(); - Path::ObjectFields(fields) + ))(input) +} + +// Compatible with Snowflake query syntax, the first field name does not require the leading period +fn pre_path(input: &[u8]) -> IResult<&[u8], Path<'_>> { + alt(( + value(Path::Root, char('$')), + map(delimited(multispace0, alphanumeric1, multispace0), |v| { + Path::DotField(Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(v) })) + }), + ))(input) +} + +fn path(input: &[u8]) -> IResult<&[u8], Path<'_>> { + alt(( + map(delimited(multispace0, inner_path, multispace0), |v| v), + map(delimited(multispace0, filter_expr, multispace0), |v| { + Path::FilterExpr(Box::new(v)) }), - map(array_slice, |v| v), - map(filter_expr, |v| Path::FilterExpr(Box::new(v))), ))(input) } -fn filter_expr(input: &str) -> IResult<&str, Expr> { +fn paths(input: &[u8]) -> IResult<&[u8], Vec>> { map( - delimited( - tag("[?("), - delimited(multispace0, expr, multispace0), - tag(")]"), + pair(opt(pre_path), many0(path)), + |(opt_pre_path, mut paths)| { + if let Some(pre_path) = opt_pre_path { + paths.insert(0, pre_path); + } + paths + }, + )(input) +} + +fn expr_paths(input: &[u8]) -> IResult<&[u8], Vec>> { + map( + pair( + alt(( + value(Path::Root, char('$')), + value(Path::Current, char('@')), + )), + many0(delimited(multispace0, inner_path, multispace0)), ), - |v| v, + |(pre_path, mut paths)| { + paths.insert(0, pre_path); + paths + }, )(input) } -fn paths(input: &str) -> IResult<&str, Vec> { - many1(path)(input) +fn filter_expr(input: &[u8]) -> IResult<&[u8], Expr<'_>> { + map( + delimited( + delimited(char('?'), multispace0, char('(')), + delimited(multispace0, expr_or, multispace0), + char(')'), + ), + |v| v, + )(input) } -fn op(input: &str) -> IResult<&str, BinaryOperator> { +fn op(input: &[u8]) -> IResult<&[u8], BinaryOperator> { alt(( value(BinaryOperator::Eq, tag("==")), value(BinaryOperator::NotEq, tag("!=")), - value(BinaryOperator::Lt, tag("<")), + value(BinaryOperator::NotEq, tag("<>")), + value(BinaryOperator::Lt, char('<')), value(BinaryOperator::Lte, tag("<=")), - value(BinaryOperator::Gt, tag(">")), + value(BinaryOperator::Gt, char('>')), value(BinaryOperator::Gte, tag(">=")), - value(BinaryOperator::Match, tag("=~")), - value(BinaryOperator::In, tag_no_case("in")), - value(BinaryOperator::Nin, tag_no_case("nin")), - value(BinaryOperator::Subsetof, tag_no_case("subsetof")), - value(BinaryOperator::Anyof, tag_no_case("anyof")), - value(BinaryOperator::Noneof, tag_no_case("noneof")), - value(BinaryOperator::Size, tag_no_case("size")), - value(BinaryOperator::Empty, tag_no_case("empty")), ))(input) } -fn path_value(input: &str) -> IResult<&str, PathValue> { +fn path_value(input: &[u8]) -> IResult<&[u8], PathValue<'_>> { alt(( value(PathValue::Null, tag("null")), value(PathValue::Boolean(true), tag("true")), value(PathValue::Boolean(false), tag("false")), - map(u64, PathValue::UInt64), - map(i64, PathValue::Int64), - map(double, PathValue::Float64), - map(string, |v| PathValue::String(v.to_string())), + map(u64, |v| PathValue::Number(Number::UInt64(v))), + map(i64, |v| PathValue::Number(Number::Int64(v))), + map(double, |v| PathValue::Number(Number::Float64(v))), + map(string, |v| { + PathValue::String(Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(v) })) + }), ))(input) } -fn sub_expr(input: &str) -> IResult<&str, Expr> { +fn inner_expr(input: &[u8]) -> IResult<&[u8], Expr<'_>> { alt(( - map(paths, Expr::Paths), + map(expr_paths, Expr::Paths), map(path_value, |v| Expr::Value(Box::new(v))), ))(input) } -fn expr(input: &str) -> IResult<&str, Expr> { - // TODO, support more complex expressions. +fn expr_atom(input: &[u8]) -> IResult<&[u8], Expr<'_>> { + // TODO, support arithmetic expressions. alt(( map( tuple(( - delimited(multispace0, sub_expr, multispace0), + delimited(multispace0, inner_expr, multispace0), op, - delimited(multispace0, sub_expr, multispace0), + delimited(multispace0, inner_expr, multispace0), )), |(left, op, right)| Expr::BinaryOp { op, @@ -220,6 +253,47 @@ fn expr(input: &str) -> IResult<&str, Expr> { right: Box::new(right), }, ), - map(sub_expr, |v| v), + map( + delimited( + terminated(char('('), multispace0), + expr_or, + preceded(multispace0, char(')')), + ), + |expr| expr, + ), ))(input) } + +fn expr_and(input: &[u8]) -> IResult<&[u8], Expr<'_>> { + map( + separated_list1(delimited(multispace0, tag("&&"), multispace0), expr_atom), + |exprs| { + let mut expr = exprs[0].clone(); + for right in exprs.iter().skip(1) { + expr = Expr::BinaryOp { + op: BinaryOperator::And, + left: Box::new(expr), + right: Box::new(right.clone()), + }; + } + expr + }, + )(input) +} + +fn expr_or(input: &[u8]) -> IResult<&[u8], Expr<'_>> { + map( + separated_list1(delimited(multispace0, tag("||"), multispace0), expr_and), + |exprs| { + let mut expr = exprs[0].clone(); + for right in exprs.iter().skip(1) { + expr = Expr::BinaryOp { + op: BinaryOperator::Or, + left: Box::new(expr), + right: Box::new(right.clone()), + }; + } + expr + }, + )(input) +} diff --git a/src/jsonpath/path.rs b/src/jsonpath/path.rs index 28de194..8aa434a 100644 --- a/src/jsonpath/path.rs +++ b/src/jsonpath/path.rs @@ -12,76 +12,95 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::borrow::Cow; +use std::cmp::Ordering; use std::fmt::Display; use std::fmt::Formatter; +use crate::number::Number; + /// Represents a set of JSON Path chains. #[derive(Debug, Clone, PartialEq)] -pub struct JsonPath { - pub paths: Vec, +pub struct JsonPath<'a> { + pub paths: Vec>, } /// Represents a valid JSON Path. #[derive(Debug, Clone, PartialEq)] -pub enum Path { +pub enum Path<'a> { /// `$` represents the root node or element. Root, /// `@` represents the current node or element being processed in the filter expression. Current, - /// `.*` represents selecting all elements in an Array or Object. + /// `.*` represents selecting all elements in an Object. DotWildcard, - /// `..*` represents recursive selecting all elements in an Array or Object. - DescentWildcard, - /// `[*]` represents selecting all elements in an Array or Object. + /// `[*]` represents selecting all elements in an Array. BracketWildcard, - /// `. represents selecting element that matched the name in an Object, like `$.event`. - DotField(String), - /// `.. represents recursive selecting all elements that matched the name, like `$..event`. - DescentField(String), - /// `[''] represents selecting element that matched the name in an Object, like `$['event']`. - ObjectField(String), - /// `['','',..] represents selecting elements that matched one of the names in an Object, like `$['event', 'author']`. - ObjectFields(Vec), - /// `[] represents selecting element specified by the index in an Array, like `$[1]`. Index is 0-based. - ArrayIndex(i32), - /// `[,,..] represents selecting elements specified by the indices in an Array, like `$[1,2]`. - ArrayIndices(Vec), - /// `[::] represents selecting elements indexed between start and end with a step in an Array, like `$[0:4:2]`. - /// If start is omitted, selecting from the first element of the Array, like `$[:3]`. - /// If end is omitted, selecting from start until the last element of the Array, like `$[1:]`. - /// If step is not specified, the default value of 1 is used. - ArraySlice { - start: Option, - end: Option, - step: Option, - }, - /// `[?()]` represents selecting all elements in an object or array that match the filter expression, like `$.book[?(@.price < 10)]`. - FilterExpr(Box), + /// `.` represents selecting element that matched the name in an Object, like `$.event`. + /// The name can also be written as a string literal, allowing the name to contain special characters, like `$." $price"`. + DotField(Cow<'a, str>), + /// `:` represents selecting element that matched the name in an Object, like `$:event`. + ColonField(Cow<'a, str>), + /// `[""]` represents selecting element that matched the name in an Object, like `$["event"]`. + ObjectField(Cow<'a, str>), + /// `[,,..]` represents selecting elements specified by the indices in an Array. + /// There are several forms of index. + /// 1. A single number representing the 0-based `n-th` element in the Array. + /// e.g. `$[0]` represents the first element in an Array. + /// 2. The keyword `last` represents the last element in the Array, + /// and last minus a number represents the n-th element before the last element, + /// e.g. `$[last-1]` represents the penultimate element. + /// 3. The keyword `to` between two numbers represent all elements of a range in an Array, + /// e.g. `$[1 to last]` represents all the elements in the Array from the second to the last. + /// + /// There can be more than one index, e.g. `$[0, last-1 to last, 5]` represents the first, + /// the last two, and the sixth element in an Array. + ArrayIndices(Vec), + /// `?()` represents selecting all elements in an object or array that match the filter expression, like `$.book[?(@.price < 10)]`. + FilterExpr(Box>), } -/// Represents a literal value used in filter expression. +/// Represents the single index in an Array. #[derive(Debug, Clone, PartialEq)] -pub enum PathValue { +pub enum Index { + /// The 0-based index in an Array. + Index(i32), + /// The last n-th index in an Array. + LastIndex(i32), +} + +/// Represents the index in an Array. +#[derive(Debug, Clone, PartialEq)] +pub enum ArrayIndex { + /// The single number index. + Index(Index), + /// The range index between two number. + Slice((Index, Index)), +} + +/// Represents a literal value used in filter expression. +#[derive(Debug, Clone, PartialEq, PartialOrd)] +pub enum PathValue<'a> { /// Null value. Null, /// Boolean value. Boolean(bool), - /// 64-bit unsigned integer. - UInt64(u64), - /// 64-bit signed integer. - Int64(i64), - /// 64-bit floating point. - Float64(f64), + /// Number value. + Number(Number), /// UTF-8 string. - String(String), + String(Cow<'a, str>), } /// Represents the operators used in filter expression. #[derive(Debug, Clone, PartialEq, Eq)] pub enum BinaryOperator { + /// `&&` represents logical And operation. + And, + /// `||` represents logical Or operation. + Or, /// `==` represents left is equal to right. Eq, - /// `!=` represents left is not equal to right. + /// `!=` and `<>` represents left is not equal to right. NotEq, /// `<` represents left is less than right. Lt, @@ -91,40 +110,24 @@ pub enum BinaryOperator { Gt, /// `>=` represents left is greater than or equal to right. Gte, - /// `=~` represents left matches regular expression, like `[?(@.name =~ /foo.*?/i)]`. - Match, - /// `in` represents left exists in right, like `[?(@.size in ['S', 'M'])]`. - In, - /// `nin` represents left does not exists in right. - Nin, - /// `subsetof` represents left is a subset of right, like `[?(@.sizes subsetof ['S', 'M', 'L'])]`. - Subsetof, - /// `anyof` represents left has an intersection with right, like `[?(@.sizes anyof ['M', 'L'])]`. - Anyof, - /// `noneof` represents left has no intersection with right, like `[?(@.sizes noneof ['M', 'L'])]`. - Noneof, - /// `size` represents size of left (Array or String) should match right. - Size, - /// `empty` represents left (Array or String) should be empty or not empty. - Empty, } /// Represents a filter expression used to filter Array or Object. #[derive(Debug, Clone, PartialEq)] -pub enum Expr { +pub enum Expr<'a> { /// JSON Path chains. - Paths(Vec), + Paths(Vec>), /// Literal value. - Value(Box), + Value(Box>), /// Filter expression that performs a binary operation, returns a boolean value. BinaryOp { op: BinaryOperator, - left: Box, - right: Box, + left: Box>, + right: Box>, }, } -impl Display for JsonPath { +impl<'a> Display for JsonPath<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { for path in &self.paths { write!(f, "{path}")?; @@ -133,7 +136,44 @@ impl Display for JsonPath { } } -impl Display for Path { +impl Display for Index { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Index::Index(idx) => { + write!(f, "{idx}")?; + } + Index::LastIndex(idx) => { + write!(f, "last")?; + match idx.cmp(&0) { + Ordering::Greater => { + write!(f, "+{idx}")?; + } + Ordering::Less => { + write!(f, "{idx}")?; + } + Ordering::Equal => {} + } + } + } + Ok(()) + } +} + +impl Display for ArrayIndex { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + ArrayIndex::Index(idx) => { + write!(f, "{idx}")?; + } + ArrayIndex::Slice((start, end)) => { + write!(f, "{start} to {end}")?; + } + } + Ok(()) + } +} + +impl<'a> Display for Path<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Path::Root => { @@ -145,33 +185,17 @@ impl Display for Path { Path::DotWildcard => { write!(f, ".*")?; } - Path::DescentWildcard => { - write!(f, "..*")?; - } Path::BracketWildcard => { write!(f, "[*]")?; } + Path::ColonField(field) => { + write!(f, ":{field}")?; + } Path::DotField(field) => { write!(f, ".{field}")?; } - Path::DescentField(field) => { - write!(f, "..{field}")?; - } Path::ObjectField(field) => { - write!(f, "['{field}']")?; - } - Path::ObjectFields(fields) => { - write!(f, "[")?; - for (i, field) in fields.iter().enumerate() { - if i > 0 { - write!(f, ", ")?; - } - write!(f, "'{field}'")?; - } - write!(f, "]")?; - } - Path::ArrayIndex(index) => { - write!(f, "[{index}]")?; + write!(f, "[\"{field}\"]")?; } Path::ArrayIndices(indices) => { write!(f, "[")?; @@ -183,30 +207,15 @@ impl Display for Path { } write!(f, "]")?; } - Path::ArraySlice { start, end, step } => { - write!(f, "[")?; - if let Some(start) = start { - write!(f, "{start}")?; - } - write!(f, ":")?; - if let Some(end) = end { - write!(f, "{end}")?; - } - if let Some(step) = step { - write!(f, ":")?; - write!(f, "{step}")?; - } - write!(f, "]")?; - } Path::FilterExpr(expr) => { - write!(f, "[?({expr})]")?; + write!(f, "?({expr})")?; } } Ok(()) } } -impl Display for PathValue { +impl<'a> Display for PathValue<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { PathValue::Null => { @@ -219,17 +228,11 @@ impl Display for PathValue { write!(f, "false") } } - PathValue::UInt64(v) => { - write!(f, "{v}") - } - PathValue::Int64(v) => { - write!(f, "{v}") - } - PathValue::Float64(v) => { + PathValue::Number(v) => { write!(f, "{v}") } PathValue::String(v) => { - write!(f, "\'{v}\'") + write!(f, "\"{v}\"") } } } @@ -238,6 +241,12 @@ impl Display for PathValue { impl Display for BinaryOperator { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { + BinaryOperator::And => { + write!(f, "&&") + } + BinaryOperator::Or => { + write!(f, "||") + } BinaryOperator::Eq => { write!(f, "==") } @@ -256,35 +265,11 @@ impl Display for BinaryOperator { BinaryOperator::Gte => { write!(f, ">=") } - BinaryOperator::Match => { - write!(f, "=~") - } - BinaryOperator::In => { - write!(f, "in") - } - BinaryOperator::Nin => { - write!(f, "nin") - } - BinaryOperator::Subsetof => { - write!(f, "subsetOf") - } - BinaryOperator::Anyof => { - write!(f, "anyOf") - } - BinaryOperator::Noneof => { - write!(f, "noneOf") - } - BinaryOperator::Size => { - write!(f, "size") - } - BinaryOperator::Empty => { - write!(f, "empty") - } } } } -impl Display for Expr { +impl<'a> Display for Expr<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { Expr::Paths(paths) => { @@ -296,7 +281,25 @@ impl Display for Expr { write!(f, "{v}")?; } Expr::BinaryOp { op, left, right } => { - write!(f, "{left} {op} {right}")?; + if let Expr::BinaryOp { op: left_op, .. } = &**left { + if left_op == &BinaryOperator::And || left_op == &BinaryOperator::Or { + write!(f, "({left})")?; + } else { + write!(f, "{left}")?; + } + } else { + write!(f, "{left}")?; + } + write!(f, " {op} ")?; + if let Expr::BinaryOp { op: right_op, .. } = &**right { + if right_op == &BinaryOperator::And || right_op == &BinaryOperator::Or { + write!(f, "({right})")?; + } else { + write!(f, "{right}")?; + } + } else { + write!(f, "{right}")?; + } } } Ok(()) diff --git a/src/jsonpath/selector.rs b/src/jsonpath/selector.rs new file mode 100644 index 0000000..f3945ab --- /dev/null +++ b/src/jsonpath/selector.rs @@ -0,0 +1,501 @@ +// Copyright 2023 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use byteorder::BigEndian; +use byteorder::WriteBytesExt; + +use std::borrow::Cow; +use std::cmp::Ordering; +use std::collections::VecDeque; + +use crate::constants::*; +use crate::jsonpath::ArrayIndex; +use crate::jsonpath::BinaryOperator; +use crate::jsonpath::Expr; +use crate::jsonpath::Index; +use crate::jsonpath::JsonPath; +use crate::jsonpath::Path; +use crate::jsonpath::PathValue; +use crate::number::Number; + +use nom::{ + bytes::complete::take, combinator::map, multi::count, number::complete::be_u32, IResult, +}; + +#[derive(Debug)] +enum Item<'a> { + Container(&'a [u8]), + Scalar(Vec), +} + +#[derive(Debug)] +enum ExprValue<'a> { + Values(Vec>), + Value(Box>), +} + +pub struct Selector<'a> { + json_path: JsonPath<'a>, +} + +impl<'a> Selector<'a> { + pub fn new(json_path: JsonPath<'a>) -> Self { + Self { json_path } + } + + pub fn select(&'a self, value: &'a [u8]) -> Vec> { + let root = value; + let mut items = VecDeque::new(); + items.push_back(Item::Container(value)); + + for path in self.json_path.paths.iter() { + match path { + &Path::Root => { + continue; + } + &Path::Current => unreachable!(), + Path::FilterExpr(expr) => { + let mut tmp_items = Vec::with_capacity(items.len()); + while let Some(item) = items.pop_front() { + let current = match item { + Item::Container(val) => val, + Item::Scalar(ref val) => val.as_slice(), + }; + if self.filter_expr(root, current, expr) { + tmp_items.push(item); + } + } + while let Some(item) = tmp_items.pop() { + items.push_front(item); + } + } + _ => { + let len = items.len(); + for _ in 0..len { + let item = items.pop_front().unwrap(); + match item { + Item::Container(current) => { + self.select_path(current, path, &mut items); + } + Item::Scalar(_) => { + // In lax mode, bracket wildcard allow Scalar value. + if path == &Path::BracketWildcard { + items.push_back(item); + } + } + } + } + } + } + } + let mut values = Vec::new(); + while let Some(item) = items.pop_front() { + match item { + Item::Container(val) => { + values.push(val.to_vec()); + } + Item::Scalar(val) => { + values.push(val); + } + } + } + values + } + + fn select_path(&'a self, current: &'a [u8], path: &Path<'a>, items: &mut VecDeque>) { + match path { + Path::DotWildcard => { + self.select_object_values(current, items); + } + Path::BracketWildcard => { + self.select_array_values(current, items); + } + Path::ColonField(name) | Path::DotField(name) | Path::ObjectField(name) => { + self.select_by_name(current, name, items); + } + Path::ArrayIndices(indices) => { + self.select_by_indices(current, indices, items); + } + _ => unreachable!(), + } + } + + // select all values in an Object. + fn select_object_values(&'a self, current: &'a [u8], items: &mut VecDeque>) { + let (rest, (ty, length)) = decode_header(current).unwrap(); + if ty != OBJECT_CONTAINER_TAG || length == 0 { + return; + } + let (rest, key_jentries) = decode_jentries(rest, length).unwrap(); + let (rest, val_jentries) = decode_jentries(rest, length).unwrap(); + let mut offset = 0; + for (_, length) in key_jentries.iter() { + offset += length; + } + let rest = &rest[offset..]; + offset = 0; + for (jty, jlength) in val_jentries.iter() { + let val = &rest[offset..offset + jlength]; + let item = if *jty == CONTAINER_TAG { + Item::Container(val) + } else { + let buf = Self::build_scalar_buf(*jty, *jlength, val); + Item::Scalar(buf) + }; + items.push_back(item); + offset += jlength; + } + } + + // select all values in an Array. + fn select_array_values(&'a self, current: &'a [u8], items: &mut VecDeque>) { + let (rest, (ty, length)) = decode_header(current).unwrap(); + if ty != ARRAY_CONTAINER_TAG { + // In lax mode, bracket wildcard allow Scalar value. + items.push_back(Item::Container(current)); + return; + } + let (rest, val_jentries) = decode_jentries(rest, length).unwrap(); + let mut offset = 0; + for (jty, jlength) in val_jentries.iter() { + let val = &rest[offset..offset + jlength]; + let item = if *jty == CONTAINER_TAG { + Item::Container(val) + } else { + let buf = Self::build_scalar_buf(*jty, *jlength, val); + Item::Scalar(buf) + }; + items.push_back(item); + offset += jlength; + } + } + + // select value in an Object by key name. + fn select_by_name(&'a self, current: &'a [u8], name: &str, items: &mut VecDeque>) { + let (rest, (ty, length)) = decode_header(current).unwrap(); + if ty != OBJECT_CONTAINER_TAG || length == 0 { + return; + } + let (rest, key_jentries) = decode_jentries(rest, length).unwrap(); + let (rest, val_jentries) = decode_jentries(rest, length).unwrap(); + let mut idx = 0; + let mut offset = 0; + let mut found = false; + for (i, (_, jlength)) in key_jentries.iter().enumerate() { + if name.len() != *jlength || found { + offset += jlength; + continue; + } + let (_, key) = decode_string(&rest[offset..], *jlength).unwrap(); + if name == unsafe { std::str::from_utf8_unchecked(key) } { + found = true; + idx = i; + } + offset += jlength; + } + if !found { + return; + } + let rest = &rest[offset..]; + offset = 0; + for (i, (jty, jlength)) in val_jentries.iter().enumerate() { + if i != idx { + offset += jlength; + continue; + } + let val = &rest[offset..offset + jlength]; + let item = if *jty == CONTAINER_TAG { + Item::Container(val) + } else { + let buf = Self::build_scalar_buf(*jty, *jlength, val); + Item::Scalar(buf) + }; + items.push_back(item); + break; + } + } + + // select values in an Array by indices. + fn select_by_indices( + &'a self, + current: &'a [u8], + indices: &Vec, + items: &mut VecDeque>, + ) { + let (rest, (ty, length)) = decode_header(current).unwrap(); + if ty != ARRAY_CONTAINER_TAG || length == 0 { + return; + } + let mut val_indices = Vec::new(); + for index in indices { + match index { + ArrayIndex::Index(idx) => { + if let Some(idx) = Self::convert_index(idx, length as i32) { + val_indices.push(idx); + } + } + ArrayIndex::Slice((start, end)) => { + if let Some(mut idxes) = Self::convert_slice(start, end, length as i32) { + val_indices.append(&mut idxes); + } + } + } + } + if val_indices.is_empty() { + return; + } + let (rest, jentries) = decode_jentries(rest, length).unwrap(); + let mut offset = 0; + let mut offsets = Vec::with_capacity(jentries.len()); + for (_, jlength) in jentries.iter() { + offsets.push(offset); + offset += jlength; + } + for i in val_indices { + let offset = offsets[i]; + let (jty, jlength) = jentries[i]; + let val = &rest[offset..offset + jlength]; + let item = if jty == CONTAINER_TAG { + Item::Container(val) + } else { + let buf = Self::build_scalar_buf(jty, jlength, val); + Item::Scalar(buf) + }; + items.push_back(item); + } + } + + fn build_scalar_buf(jty: u32, jlength: usize, val: &'a [u8]) -> Vec { + let mut buf = Vec::with_capacity(8 + jlength); + buf.write_u32::(SCALAR_CONTAINER_TAG).unwrap(); + let jentry = jty | jlength as u32; + buf.write_u32::(jentry).unwrap(); + buf.extend_from_slice(val); + buf + } + + // check and convert index to Array index. + fn convert_index(index: &Index, length: i32) -> Option { + let idx = match index { + Index::Index(idx) => *idx, + Index::LastIndex(idx) => length + *idx - 1, + }; + if idx >= 0 && idx < length { + Some(idx as usize) + } else { + None + } + } + + // check and convert slice to Array indices. + fn convert_slice(start: &Index, end: &Index, length: i32) -> Option> { + let start = match start { + Index::Index(idx) => *idx, + Index::LastIndex(idx) => length + *idx - 1, + }; + let end = match end { + Index::Index(idx) => *idx, + Index::LastIndex(idx) => length + *idx - 1, + }; + if start > end || start >= length || end < 0 { + None + } else { + let start = if start < 0 { 0 } else { start as usize }; + let end = if end >= length { + (length - 1) as usize + } else { + end as usize + }; + Some((start..=end).collect()) + } + } + + fn filter_expr(&'a self, root: &'a [u8], current: &'a [u8], expr: &Expr<'a>) -> bool { + match expr { + Expr::BinaryOp { op, left, right } => match op { + BinaryOperator::Or => { + let lhs = self.filter_expr(root, current, left); + let rhs = self.filter_expr(root, current, right); + lhs || rhs + } + BinaryOperator::And => { + let lhs = self.filter_expr(root, current, left); + let rhs = self.filter_expr(root, current, right); + lhs && rhs + } + _ => { + let lhs = self.convert_expr_val(root, current, *left.clone()); + let rhs = self.convert_expr_val(root, current, *right.clone()); + self.compare(op, &lhs, &rhs) + } + }, + _ => todo!(), + } + } + + fn convert_expr_val( + &'a self, + root: &'a [u8], + current: &'a [u8], + expr: Expr<'a>, + ) -> ExprValue<'a> { + match expr { + Expr::Value(value) => ExprValue::Value(value.clone()), + Expr::Paths(paths) => { + // get value from path and convert to `ExprValue`. + let mut items = VecDeque::new(); + if let Some(Path::Current) = paths.get(0) { + items.push_back(Item::Container(current)); + } else { + items.push_back(Item::Container(root)); + } + + for path in paths.iter().skip(1) { + match path { + &Path::Root | &Path::Current | &Path::FilterExpr(_) => unreachable!(), + _ => { + let len = items.len(); + for _ in 0..len { + let item = items.pop_front().unwrap(); + match item { + Item::Container(current) => { + self.select_path(current, path, &mut items); + } + Item::Scalar(_) => { + // In lax mode, bracket wildcard allow Scalar value. + if path == &Path::BracketWildcard { + items.push_back(item); + } + } + } + } + } + } + } + let mut values = Vec::with_capacity(items.len()); + while let Some(item) = items.pop_front() { + let val = match item { + Item::Container(val) => val, + Item::Scalar(ref val) => val.as_slice(), + }; + let (rest, (ty, _)) = decode_header(val).unwrap(); + if ty == SCALAR_CONTAINER_TAG { + let (rest, (jty, jlength)) = decode_jentry(rest).unwrap(); + let value = match jty { + NULL_TAG => PathValue::Null, + TRUE_TAG => PathValue::Boolean(true), + FALSE_TAG => PathValue::Boolean(false), + NUMBER_TAG => { + let n = Number::decode(&rest[0..jlength]); + PathValue::Number(n) + } + STRING_TAG => { + let v = &rest[0..jlength]; + PathValue::String(Cow::Owned(unsafe { + String::from_utf8_unchecked(v.to_vec()) + })) + } + _ => unreachable!(), + }; + values.push(value); + } + } + ExprValue::Values(values) + } + _ => unreachable!(), + } + } + + fn compare(&'a self, op: &BinaryOperator, lhs: &ExprValue<'a>, rhs: &ExprValue<'a>) -> bool { + match (lhs, rhs) { + (ExprValue::Value(lhs), ExprValue::Value(rhs)) => { + self.compare_value(op, *lhs.clone(), *rhs.clone()) + } + (ExprValue::Values(lhses), ExprValue::Value(rhs)) => { + for lhs in lhses.iter() { + if self.compare_value(op, lhs.clone(), *rhs.clone()) { + return true; + } + } + false + } + (ExprValue::Value(lhs), ExprValue::Values(rhses)) => { + for rhs in rhses.iter() { + if self.compare_value(op, *lhs.clone(), rhs.clone()) { + return true; + } + } + false + } + (ExprValue::Values(lhses), ExprValue::Values(rhses)) => { + for lhs in lhses.iter() { + for rhs in rhses.iter() { + if self.compare_value(op, lhs.clone(), rhs.clone()) { + return true; + } + } + } + false + } + } + } + + fn compare_value( + &'a self, + op: &BinaryOperator, + lhs: PathValue<'a>, + rhs: PathValue<'a>, + ) -> bool { + let order = lhs.partial_cmp(&rhs); + if let Some(order) = order { + match op { + BinaryOperator::Eq => order == Ordering::Equal, + BinaryOperator::NotEq => order != Ordering::Equal, + BinaryOperator::Lt => order == Ordering::Less, + BinaryOperator::Lte => order == Ordering::Equal || order == Ordering::Less, + BinaryOperator::Gt => order == Ordering::Greater, + BinaryOperator::Gte => order == Ordering::Equal || order == Ordering::Greater, + _ => unreachable!(), + } + } else { + false + } + } +} + +fn decode_header(input: &[u8]) -> IResult<&[u8], (u32, usize)> { + map(be_u32, |header| { + ( + header & CONTAINER_HEADER_TYPE_MASK, + (header & CONTAINER_HEADER_LEN_MASK) as usize, + ) + })(input) +} + +fn decode_jentry(input: &[u8]) -> IResult<&[u8], (u32, usize)> { + map(be_u32, |jentry| { + ( + jentry & JENTRY_TYPE_MASK, + (jentry & JENTRY_OFF_LEN_MASK) as usize, + ) + })(input) +} + +fn decode_jentries(input: &[u8], length: usize) -> IResult<&[u8], Vec<(u32, usize)>> { + count(decode_jentry, length)(input) +} + +fn decode_string(input: &[u8], length: usize) -> IResult<&[u8], &[u8]> { + take(length)(input) +} diff --git a/src/lib.rs b/src/lib.rs index 08e4178..bbf6112 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -20,7 +20,6 @@ mod error; mod from; mod functions; mod jentry; -mod json_path; pub mod jsonpath; mod number; mod parser; @@ -32,7 +31,6 @@ pub use de::from_slice; pub use error::Error; pub use from::*; pub use functions::*; -pub use json_path::*; pub use number::Number; pub use parser::parse_value; pub use value::*; diff --git a/src/number.rs b/src/number.rs index fffb092..3b1edee 100644 --- a/src/number.rs +++ b/src/number.rs @@ -23,7 +23,7 @@ use ordered_float::OrderedFloat; use super::constants::*; use super::error::Error; -#[derive(Clone, Debug)] +#[derive(Debug, Clone)] pub enum Number { Int64(i64), UInt64(u64), diff --git a/src/value.rs b/src/value.rs index 7179146..b895be8 100644 --- a/src/value.rs +++ b/src/value.rs @@ -18,7 +18,6 @@ use std::fmt::Debug; use std::fmt::Display; use std::fmt::Formatter; -use super::json_path::JsonPathRef; use super::number::Number; use super::ser::Encoder; @@ -219,40 +218,6 @@ impl<'a> Value<'a> { buf } - pub fn get_by_path(&self, paths: &[JsonPathRef<'a>]) -> Option<&Value<'a>> { - if paths.is_empty() { - return None; - } - let path = paths.get(0).unwrap(); - match path { - JsonPathRef::String(name) => { - if let Some(obj) = self.as_object() { - if let Some(val) = obj.get(name.as_ref()) { - let val = if paths.len() == 1 { - Some(val) - } else { - val.get_by_path(paths.get(1..).unwrap()) - }; - return val; - } - } - } - JsonPathRef::UInt64(index) => { - if let Some(arr) = self.as_array() { - if let Some(val) = arr.get(*index as usize) { - let val = if paths.len() == 1 { - Some(val) - } else { - val.get_by_path(paths.get(1..).unwrap()) - }; - return val; - } - } - } - } - None - } - pub fn get_by_name_ignore_case(&self, name: &str) -> Option<&Value<'a>> { match self { Value::Object(obj) => match obj.get(name) { diff --git a/tests/it/functions.rs b/tests/it/functions.rs index 55c81b8..af2ffed 100644 --- a/tests/it/functions.rs +++ b/tests/it/functions.rs @@ -17,11 +17,13 @@ use std::cmp::Ordering; use jsonb::{ array_length, as_bool, as_null, as_number, as_str, build_array, build_object, compare, - from_slice, get_by_name_ignore_case, get_by_path, is_array, is_object, object_keys, - parse_json_path, parse_value, to_bool, to_f64, to_i64, to_str, to_string, to_u64, Error, - JsonPathRef, Number, Object, Value, + from_slice, get_by_index, get_by_name, get_by_name_ignore_case, get_by_path, is_array, + is_object, object_keys, parse_value, to_bool, to_f64, to_i64, to_str, to_string, to_u64, + Number, Object, Value, }; +use jsonb::jsonpath::parse_json_path; + #[test] fn test_build_array() { let sources = vec![ @@ -135,39 +137,128 @@ fn test_array_length() { #[test] fn test_get_by_path() { - let sources = vec![ - (r#"1234"#, vec![JsonPathRef::UInt64(0)], None), - (r#"[]"#, vec![JsonPathRef::UInt64(0)], None), + let source = r#"{"name":"Fred","phones":[{"type":"home","number":3720453},{"type":"work","number":5062051}]}"#; + + let paths = vec![ + (r#"$.name"#, vec![r#""Fred""#]), ( - r#"["a","b","c"]"#, - vec![JsonPathRef::UInt64(0)], - Some(Value::String(Cow::from("a"))), + r#"$.phones"#, + vec![r#"[{"type":"home","number":3720453},{"type":"work","number":5062051}]"#], ), + (r#"$.phones.*"#, vec![]), ( - r#"{"k1":["a","b","c"], "k2":{"k3":3,"k4":4}}"#, - vec![JsonPathRef::String(Cow::from("k1")), JsonPathRef::UInt64(0)], - Some(Value::String(Cow::from("a"))), + r#"$.phones[*]"#, + vec![ + r#"{"type":"home","number":3720453}"#, + r#"{"type":"work","number":5062051}"#, + ], ), + (r#"$.phones[0].*"#, vec![r#"3720453"#, r#""home""#]), + (r#"$.phones[0].type"#, vec![r#""home""#]), + (r#"$.phones[*].type[*]"#, vec![r#""home""#, r#""work""#]), ( - r#"{"k1":["a","b","c"], "k2":{"k3":"v3","k4":"v4"}}"#, + r#"$.phones[0 to last].number"#, + vec![r#"3720453"#, r#"5062051"#], + ), + ( + r#"$.phones[0 to last]?(4 == 4)"#, vec![ - JsonPathRef::String(Cow::from("k2")), - JsonPathRef::String(Cow::from("k3")), + r#"{"type":"home","number":3720453}"#, + r#"{"type":"work","number":5062051}"#, ], + ), + ( + r#"$.phones[0 to last]?(@.type == "home")"#, + vec![r#"{"type":"home","number":3720453}"#], + ), + ( + r#"$.phones[0 to last]?(@.number == 3720453)"#, + vec![r#"{"type":"home","number":3720453}"#], + ), + ( + r#"$.phones[0 to last]?(@.number == 3720453 || @.type == "work")"#, + vec![ + r#"{"type":"home","number":3720453}"#, + r#"{"type":"work","number":5062051}"#, + ], + ), + ( + r#"$.phones[0 to last]?(@.number == 3720453 && @.type == "work")"#, + vec![], + ), + ]; + + let mut buf: Vec = Vec::new(); + let value = parse_value(source.as_bytes()).unwrap(); + value.write_to_vec(&mut buf); + for (path, expects) in paths { + let json_path = parse_json_path(path.as_bytes()).unwrap(); + let res = get_by_path(&buf, json_path); + assert_eq!(res.len(), expects.len()); + for (val, expect) in res.into_iter().zip(expects.iter()) { + let mut val_buf: Vec = Vec::new(); + let val_expect = parse_value(expect.as_bytes()).unwrap(); + val_expect.write_to_vec(&mut val_buf); + assert_eq!(val, val_buf); + } + } +} + +#[test] +fn test_get_by_index() { + let sources = vec![ + (r#"1234"#, 0, None), + (r#"[]"#, 0, None), + (r#"[1,2,3]"#, 1, Some(Value::Number(Number::UInt64(2)))), + (r#"["a","b","c"]"#, 0, Some(Value::String(Cow::from("a")))), + ]; + + let mut buf: Vec = Vec::new(); + for (s, idx, expect) in sources { + let res = get_by_index(s.as_bytes(), idx); + match expect.clone() { + Some(expect) => assert_eq!(from_slice(&res.unwrap()).unwrap(), expect), + None => assert_eq!(res, None), + } + let value = parse_value(s.as_bytes()).unwrap(); + value.write_to_vec(&mut buf); + let res = get_by_index(&buf, idx); + match expect { + Some(expect) => assert_eq!(from_slice(&res.unwrap()).unwrap(), expect), + None => assert_eq!(res, None), + } + buf.clear(); + } +} + +#[test] +fn test_get_by_name() { + let sources = vec![ + (r#"true"#, "a".to_string(), None), + (r#"[1,2,3]"#, "a".to_string(), None), + (r#"{"a":"v1","b":[1,2,3]}"#, "k".to_string(), None), + ( + r#"{"Aa":"v1", "aA":"v2", "aa":"v3"}"#, + "aa".to_string(), Some(Value::String(Cow::from("v3"))), ), + ( + r#"{"Aa":"v1", "aA":"v2", "aa":"v3"}"#, + "AA".to_string(), + None, + ), ]; let mut buf: Vec = Vec::new(); - for (s, paths, expect) in sources { - let res = get_by_path(s.as_bytes(), paths.clone()); + for (s, name, expect) in sources { + let res = get_by_name(s.as_bytes(), &name); match expect.clone() { Some(expect) => assert_eq!(from_slice(&res.unwrap()).unwrap(), expect), None => assert_eq!(res, None), } let value = parse_value(s.as_bytes()).unwrap(); value.write_to_vec(&mut buf); - let res = get_by_path(&buf, paths); + let res = get_by_name(&buf, &name); match expect { Some(expect) => assert_eq!(from_slice(&res.unwrap()).unwrap(), expect), None => assert_eq!(res, None), @@ -333,61 +424,6 @@ fn test_compare() { } } -#[test] -fn test_parse_json_path() { - let sources = vec![ - ( - r#"[1][2]"#, - vec![JsonPathRef::UInt64(1), JsonPathRef::UInt64(2)], - ), - ( - r#"["k1"]["k2"]"#, - vec![ - JsonPathRef::String(Cow::from("k1")), - JsonPathRef::String(Cow::from("k2")), - ], - ), - ( - r#"k1.k2:k3"#, - vec![ - JsonPathRef::String(Cow::from("k1")), - JsonPathRef::String(Cow::from("k2")), - JsonPathRef::String(Cow::from("k3")), - ], - ), - ("\"k1\"", vec![JsonPathRef::String(Cow::from("k1"))]), - ("\"k_1\"", vec![JsonPathRef::String(Cow::from("k_1"))]), - ("\"k_1k_2\"", vec![JsonPathRef::String(Cow::from("k_1k_2"))]), - ("\"k1k2\"", vec![JsonPathRef::String(Cow::from("k1k2"))]), - ( - r#"k1["k2"][1]"#, - vec![ - JsonPathRef::String(Cow::from("k1")), - JsonPathRef::String(Cow::from("k2")), - JsonPathRef::UInt64(1), - ], - ), - ]; - - for (s, expect) in sources { - let path = parse_json_path(s.as_bytes()).unwrap(); - assert_eq!(&path[..], &expect[..]); - } - - let wrong_sources = vec![ - (r#"\"\"\\k1\"\""#, Error::InvalidToken), - (r#"\\k1\\'"#, Error::InvalidToken), - (r#"\"kk\"1\""#, Error::InvalidToken), - ]; - for (s, expect) in wrong_sources { - let path = parse_json_path(s.as_bytes()); - match path { - Ok(_) => println!(), - Err(_) => assert_eq!(Error::InvalidToken, expect), - } - } -} - #[test] fn test_as_type() { let sources = vec![ diff --git a/tests/it/jsonpath_parser.rs b/tests/it/jsonpath_parser.rs index d6080ef..16c3da4 100644 --- a/tests/it/jsonpath_parser.rs +++ b/tests/it/jsonpath_parser.rs @@ -24,25 +24,29 @@ fn test_json_path() { let cases = &[ r#"$"#, r#"$.*"#, - r#"$..*"#, r#"$[*]"#, + r#"$.store.book[*].*"#, r#"$.store.book[0].price"#, - r#"$.store.book[-1].isbn"#, - r#"$..book[0,1].price"#, - r#"$..book[0:2]"#, - r#"$..book[:2]"#, - r#"$..book[-2:]"#, - r#"$..book[0:4:2]"#, - r#"$..book[:]['category']"#, - r#"$..book[*]['category', 'author']"#, - r#"$.store.book[?(@.isbn)].price"#, - r#"$.store.book[?(@.price > 10)].title"#, - r#"$.store.book[?(@.price < $.expensive)].price"#, - r#"$.store.book[:].price"#, + r#"$.store.book[last].isbn"#, + r#"$.store.book[0,1, last - 2].price"#, + r#"$.store.book[0,1 to last-1]"#, + r#"$."store"."book""#, + r#"$[*].book.price ? (@ == 10)"#, + r#"$.store.book?(@.price > 10).title"#, + r#"$.store.book?(@.price < $.expensive).price"#, + r#"$.store.book?(@.price < 10 && @.category == "fiction")"#, + r#"$.store.book?(@.price > 10 || @.category == "reference")"#, + r#"$.store.book?(@.price > 20 && (@.category == "reference" || @.category == "fiction"))"#, + // compatible with Snowflake style path + r#"[1][2]"#, + r#"["k1"]["k2"]"#, + r#"k1.k2:k3"#, + r#"k1["k2"][1]"#, ]; for case in cases { - let json_path = parse_json_path(case).unwrap(); + let json_path = parse_json_path(case.as_bytes()).unwrap(); + writeln!(file, "---------- Input ----------").unwrap(); writeln!(file, "{}", case).unwrap(); writeln!(file, "---------- Output ---------").unwrap(); @@ -56,7 +60,6 @@ fn test_json_path() { #[test] fn test_json_path_error() { let cases = &[ - r#"x"#, r#"$.["#, r#"$X"#, r#"$."#, @@ -71,7 +74,7 @@ fn test_json_path_error() { ]; for case in cases { - let res = parse_json_path(case); + let res = parse_json_path(case.as_bytes()); assert!(res.is_err()); } } diff --git a/tests/it/parser.rs b/tests/it/parser.rs index 7863e58..2a93635 100644 --- a/tests/it/parser.rs +++ b/tests/it/parser.rs @@ -267,6 +267,7 @@ fn test_parse_string() { ]); test_parse_ok(vec![ + ("\"abc\"", Value::String(Cow::from("abc"))), ("\"n\"", Value::String(Cow::from("n"))), ("\"\\\"\"", Value::String(Cow::from("\""))), ("\"\\\\\"", Value::String(Cow::from("\\"))), diff --git a/tests/it/testdata/json_path.txt b/tests/it/testdata/json_path.txt index 1f8ecba..d7d81fa 100644 --- a/tests/it/testdata/json_path.txt +++ b/tests/it/testdata/json_path.txt @@ -24,27 +24,34 @@ JsonPath { ---------- Input ---------- -$..* +$[*] ---------- Output --------- -$..* +$[*] ---------- AST ------------ JsonPath { paths: [ Root, - DescentWildcard, + BracketWildcard, ], } ---------- Input ---------- -$[*] +$.store.book[*].* ---------- Output --------- -$[*] +$.store.book[*].* ---------- AST ------------ JsonPath { paths: [ Root, + DotField( + "store", + ), + DotField( + "book", + ), BracketWildcard, + DotWildcard, ], } @@ -63,8 +70,14 @@ JsonPath { DotField( "book", ), - ArrayIndex( - 0, + ArrayIndices( + [ + Index( + Index( + 0, + ), + ), + ], ), DotField( "price", @@ -74,9 +87,9 @@ JsonPath { ---------- Input ---------- -$.store.book[-1].isbn +$.store.book[last].isbn ---------- Output --------- -$.store.book[-1].isbn +$.store.book[last].isbn ---------- AST ------------ JsonPath { paths: [ @@ -87,8 +100,14 @@ JsonPath { DotField( "book", ), - ArrayIndex( - -1, + ArrayIndices( + [ + Index( + LastIndex( + 0, + ), + ), + ], ), DotField( "isbn", @@ -98,20 +117,36 @@ JsonPath { ---------- Input ---------- -$..book[0,1].price +$.store.book[0,1, last - 2].price ---------- Output --------- -$..book[0, 1].price +$.store.book[0, 1, last-2].price ---------- AST ------------ JsonPath { paths: [ Root, - DescentField( + DotField( + "store", + ), + DotField( "book", ), ArrayIndices( [ - 0, - 1, + Index( + Index( + 0, + ), + ), + Index( + Index( + 1, + ), + ), + Index( + LastIndex( + -2, + ), + ), ], ), DotField( @@ -122,148 +157,240 @@ JsonPath { ---------- Input ---------- -$..book[0:2] +$.store.book[0,1 to last-1] ---------- Output --------- -$..book[0:2] +$.store.book[0, 1 to last-1] ---------- AST ------------ JsonPath { paths: [ Root, - DescentField( + DotField( + "store", + ), + DotField( "book", ), - ArraySlice { - start: Some( - 0, - ), - end: Some( - 2, - ), - step: None, - }, + ArrayIndices( + [ + Index( + Index( + 0, + ), + ), + Slice( + ( + Index( + 1, + ), + LastIndex( + -1, + ), + ), + ), + ], + ), ], } ---------- Input ---------- -$..book[:2] +$."store"."book" ---------- Output --------- -$..book[:2] +$.store.book ---------- AST ------------ JsonPath { paths: [ Root, - DescentField( + DotField( + "store", + ), + DotField( "book", ), - ArraySlice { - start: None, - end: Some( - 2, - ), - step: None, - }, ], } ---------- Input ---------- -$..book[-2:] +$[*].book.price ? (@ == 10) ---------- Output --------- -$..book[-2:] +$[*].book.price?(@ == 10) ---------- AST ------------ JsonPath { paths: [ Root, - DescentField( + BracketWildcard, + DotField( "book", ), - ArraySlice { - start: Some( - -2, - ), - end: None, - step: None, - }, + DotField( + "price", + ), + FilterExpr( + BinaryOp { + op: Eq, + left: Paths( + [ + Current, + ], + ), + right: Value( + Number( + UInt64( + 10, + ), + ), + ), + }, + ), ], } ---------- Input ---------- -$..book[0:4:2] +$.store.book?(@.price > 10).title ---------- Output --------- -$..book[0:4:2] +$.store.book?(@.price > 10).title ---------- AST ------------ JsonPath { paths: [ Root, - DescentField( + DotField( + "store", + ), + DotField( "book", ), - ArraySlice { - start: Some( - 0, - ), - end: Some( - 4, - ), - step: Some( - 2, - ), - }, + FilterExpr( + BinaryOp { + op: Gt, + left: Paths( + [ + Current, + DotField( + "price", + ), + ], + ), + right: Value( + Number( + UInt64( + 10, + ), + ), + ), + }, + ), + DotField( + "title", + ), ], } ---------- Input ---------- -$..book[:]['category'] +$.store.book?(@.price < $.expensive).price ---------- Output --------- -$..book[:]['category'] +$.store.book?(@.price < $.expensive).price ---------- AST ------------ JsonPath { paths: [ Root, - DescentField( + DotField( + "store", + ), + DotField( "book", ), - ArraySlice { - start: None, - end: None, - step: None, - }, - ObjectField( - "category", + FilterExpr( + BinaryOp { + op: Lt, + left: Paths( + [ + Current, + DotField( + "price", + ), + ], + ), + right: Paths( + [ + Root, + DotField( + "expensive", + ), + ], + ), + }, + ), + DotField( + "price", ), ], } ---------- Input ---------- -$..book[*]['category', 'author'] +$.store.book?(@.price < 10 && @.category == "fiction") ---------- Output --------- -$..book[*]['category', 'author'] +$.store.book?(@.price < 10 && @.category == "fiction") ---------- AST ------------ JsonPath { paths: [ Root, - DescentField( + DotField( + "store", + ), + DotField( "book", ), - BracketWildcard, - ObjectFields( - [ - "category", - "author", - ], + FilterExpr( + BinaryOp { + op: And, + left: BinaryOp { + op: Lt, + left: Paths( + [ + Current, + DotField( + "price", + ), + ], + ), + right: Value( + Number( + UInt64( + 10, + ), + ), + ), + }, + right: BinaryOp { + op: Eq, + left: Paths( + [ + Current, + DotField( + "category", + ), + ], + ), + right: Value( + String( + "fiction", + ), + ), + }, + }, ), ], } ---------- Input ---------- -$.store.book[?(@.isbn)].price +$.store.book?(@.price > 10 || @.category == "reference") ---------- Output --------- -$.store.book[?(@.isbn)].price +$.store.book?(@.price > 10 || @.category == "reference") ---------- AST ------------ JsonPath { paths: [ @@ -275,26 +402,52 @@ JsonPath { "book", ), FilterExpr( - Paths( - [ - Current, - DotField( - "isbn", + BinaryOp { + op: Or, + left: BinaryOp { + op: Gt, + left: Paths( + [ + Current, + DotField( + "price", + ), + ], ), - ], - ), - ), - DotField( - "price", + right: Value( + Number( + UInt64( + 10, + ), + ), + ), + }, + right: BinaryOp { + op: Eq, + left: Paths( + [ + Current, + DotField( + "category", + ), + ], + ), + right: Value( + String( + "reference", + ), + ), + }, + }, ), ], } ---------- Input ---------- -$.store.book[?(@.price > 10)].title +$.store.book?(@.price > 20 && (@.category == "reference" || @.category == "fiction")) ---------- Output --------- -$.store.book[?(@.price > 10)].title +$.store.book?(@.price > 20 && (@.category == "reference" || @.category == "fiction")) ---------- AST ------------ JsonPath { paths: [ @@ -307,92 +460,153 @@ JsonPath { ), FilterExpr( BinaryOp { - op: Gt, - left: Paths( - [ - Current, - DotField( - "price", + op: And, + left: BinaryOp { + op: Gt, + left: Paths( + [ + Current, + DotField( + "price", + ), + ], + ), + right: Value( + Number( + UInt64( + 20, + ), ), - ], + ), + }, + right: BinaryOp { + op: Or, + left: BinaryOp { + op: Eq, + left: Paths( + [ + Current, + DotField( + "category", + ), + ], + ), + right: Value( + String( + "reference", + ), + ), + }, + right: BinaryOp { + op: Eq, + left: Paths( + [ + Current, + DotField( + "category", + ), + ], + ), + right: Value( + String( + "fiction", + ), + ), + }, + }, + }, + ), + ], +} + + +---------- Input ---------- +[1][2] +---------- Output --------- +[1][2] +---------- AST ------------ +JsonPath { + paths: [ + ArrayIndices( + [ + Index( + Index( + 1, + ), ), - right: Value( - UInt64( - 10, + ], + ), + ArrayIndices( + [ + Index( + Index( + 2, ), ), - }, + ], ), - DotField( - "title", + ], +} + + +---------- Input ---------- +["k1"]["k2"] +---------- Output --------- +["k1"]["k2"] +---------- AST ------------ +JsonPath { + paths: [ + ObjectField( + "k1", + ), + ObjectField( + "k2", ), ], } ---------- Input ---------- -$.store.book[?(@.price < $.expensive)].price +k1.k2:k3 ---------- Output --------- -$.store.book[?(@.price < $.expensive)].price +.k1.k2:k3 ---------- AST ------------ JsonPath { paths: [ - Root, DotField( - "store", + "k1", ), DotField( - "book", - ), - FilterExpr( - BinaryOp { - op: Lt, - left: Paths( - [ - Current, - DotField( - "price", - ), - ], - ), - right: Paths( - [ - Root, - DotField( - "expensive", - ), - ], - ), - }, + "k2", ), - DotField( - "price", + ColonField( + "k3", ), ], } ---------- Input ---------- -$.store.book[:].price +k1["k2"][1] ---------- Output --------- -$.store.book[:].price +.k1["k2"][1] ---------- AST ------------ JsonPath { paths: [ - Root, DotField( - "store", + "k1", ), - DotField( - "book", + ObjectField( + "k2", ), - ArraySlice { - start: None, - end: None, - step: None, - }, - DotField( - "price", + ArrayIndices( + [ + Index( + Index( + 1, + ), + ), + ], ), ], }