Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ pub enum Error {
InvalidJsonbJEntry,

InvalidJsonPath,
InvalidKeyPath,

Syntax(ParseErrorCode, usize),
}
Expand Down
119 changes: 68 additions & 51 deletions src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use core::convert::TryInto;
use std::borrow::Cow;
use std::cmp::Ordering;
use std::collections::VecDeque;
use std::str::from_utf8;

use crate::constants::*;
use crate::error::*;
Expand All @@ -25,6 +24,7 @@ use crate::jentry::JEntry;
use crate::jsonpath::JsonPath;
use crate::jsonpath::Mode;
use crate::jsonpath::Selector;
use crate::keypath::KeyPath;
use crate::number::Number;
use crate::parser::parse_value;
use crate::value::Object;
Expand Down Expand Up @@ -271,32 +271,41 @@ pub fn get_by_name(value: &[u8], name: &str, ignore_case: bool) -> Option<Vec<u8

/// Extracts JSON sub-object at the specified path,
/// where path elements can be either field keys or array indexes encoded in utf-8 string.
pub fn get_by_keypath<'a, I: Iterator<Item = &'a [u8]>>(
pub fn get_by_keypath<'a, I: Iterator<Item = &'a KeyPath<'a>>>(
value: &[u8],
keypath: I,
keypaths: I,
) -> Option<Vec<u8>> {
if !is_jsonb(value) {
return match parse_value(value) {
Ok(val) => {
let mut current_val = &val;
for key in keypath {
match from_utf8(key) {
Ok(k) => {
let res = match current_val {
Value::Array(arr) => match k.parse::<usize>() {
Ok(idx) => arr.get(idx),
Err(_) => None,
},
Value::Object(obj) => obj.get(k),
_ => None,
};
match res {
Some(v) => current_val = v,
None => return None,
};
}
Err(_) => return None,
}
for path in keypaths {
let res = match path {
KeyPath::Index(idx) => match current_val {
Value::Array(arr) => {
let length = arr.len() as i32;
if *idx > length || length + *idx < 0 {
None
} else {
let idx = if *idx >= 0 {
*idx as usize
} else {
(length + *idx) as usize
};
arr.get(idx)
}
}
_ => None,
},
KeyPath::QuotedName(name) | KeyPath::Name(name) => match current_val {
Value::Object(obj) => obj.get(name.as_ref()),
_ => None,
},
};
match res {
Some(v) => current_val = v,
None => return None,
};
}
Some(current_val.to_vec())
}
Expand All @@ -308,43 +317,51 @@ pub fn get_by_keypath<'a, I: Iterator<Item = &'a [u8]>>(
let mut curr_jentry_encoded = 0;
let mut curr_jentry: Option<JEntry> = None;

for key in keypath {
match from_utf8(key) {
Ok(k) => {
if let Some(ref jentry) = curr_jentry {
if jentry.type_code != CONTAINER_TAG {
return None;
for path in keypaths {
if let Some(ref jentry) = curr_jentry {
if jentry.type_code != CONTAINER_TAG {
return None;
}
}
let header = read_u32(value, curr_val_offset).unwrap();
let length = (header & CONTAINER_HEADER_LEN_MASK) as i32;
match (path, header & CONTAINER_HEADER_TYPE_MASK) {
(KeyPath::QuotedName(name) | KeyPath::Name(name), OBJECT_CONTAINER_TAG) => {
match get_jentry_by_name(value, curr_val_offset, header, name, false) {
Some((jentry, encoded, value_offset)) => {
curr_jentry_encoded = encoded;
curr_jentry = Some(jentry);
curr_val_offset = value_offset;
}
None => return None,
};
let header = read_u32(value, curr_val_offset).unwrap();
match header & CONTAINER_HEADER_TYPE_MASK {
OBJECT_CONTAINER_TAG => {
match get_jentry_by_name(value, curr_val_offset, header, k, false) {
Some((jentry, encoded, value_offset)) => {
curr_jentry_encoded = encoded;
curr_jentry = Some(jentry);
curr_val_offset = value_offset;
}
None => return None,
};
}
(KeyPath::Index(idx), ARRAY_CONTAINER_TAG) => {
if *idx > length || length + *idx < 0 {
return None;
} else {
let idx = if *idx >= 0 {
*idx as usize
} else {
(length + *idx) as usize
};
match get_jentry_by_index(value, curr_val_offset, header, idx) {
Some((jentry, encoded, value_offset)) => {
curr_jentry_encoded = encoded;
curr_jentry = Some(jentry);
curr_val_offset = value_offset;
}
None => return None,
}
ARRAY_CONTAINER_TAG => match k.parse::<usize>() {
Ok(idx) => match get_jentry_by_index(value, curr_val_offset, header, idx) {
Some((jentry, encoded, value_offset)) => {
curr_jentry_encoded = encoded;
curr_jentry = Some(jentry);
curr_val_offset = value_offset;
}
None => return None,
},
Err(_) => return None,
},
_ => return None,
}
}
Err(_) => return None,
(_, _) => return None,
}
}
// If the key paths is empty, return original value.
if curr_jentry_encoded == 0 {
return Some(value.to_vec());
}
curr_jentry
.map(|jentry| extract_by_jentry(&jentry, curr_jentry_encoded, curr_val_offset, value))
}
Expand Down
2 changes: 2 additions & 0 deletions src/jsonpath/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,7 @@ mod path;
mod selector;

pub use parser::parse_json_path;
pub(crate) use parser::raw_string;
pub(crate) use parser::string;
pub use path::*;
pub use selector::*;
17 changes: 9 additions & 8 deletions src/jsonpath/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ pub fn parse_json_path(input: &[u8]) -> Result<JsonPath<'_>, Error> {
}
Ok(json_path)
}
Err(nom::Err::Error(_err) | nom::Err::Failure(_err)) => Err(Error::InvalidJsonb),
Err(nom::Err::Error(_) | nom::Err::Failure(_)) => Err(Error::InvalidJsonPath),
Err(nom::Err::Incomplete(_)) => unreachable!(),
}
}
Expand Down Expand Up @@ -73,7 +73,7 @@ fn check_escaped(input: &[u8], i: &mut usize) -> bool {
true
}

fn raw_string(input: &[u8]) -> IResult<&[u8], Cow<'_, str>> {
pub(crate) fn raw_string(input: &[u8]) -> IResult<&[u8], Cow<'_, str>> {
let mut i = 0;
let mut escapes = 0;
while i < input.len() {
Expand All @@ -85,8 +85,9 @@ fn raw_string(input: &[u8]) -> IResult<&[u8], Cow<'_, str>> {
return Err(nom::Err::Error(NomError::new(input, ErrorKind::Char)));
}
}
b' ' | b'.' | b':' | b'[' | b']' | b'(' | b')' | b'?' | b'@' | b'$' | b'|' | b'<'
| b'>' | b'!' | b'=' | b'+' | b'-' | b'*' | b'/' | b'%' | b'"' | b'\'' => {
b' ' | b',' | b'.' | b':' | b'{' | b'}' | b'[' | b']' | b'(' | b')' | b'?' | b'@'
| b'$' | b'|' | b'<' | b'>' | b'!' | b'=' | b'+' | b'-' | b'*' | b'/' | b'%' | b'"'
| b'\'' => {
break;
}
_ => {
Expand All @@ -111,7 +112,7 @@ fn raw_string(input: &[u8]) -> IResult<&[u8], Cow<'_, str>> {
Err(nom::Err::Error(NomError::new(input, ErrorKind::Char)))
}

fn string(input: &[u8]) -> IResult<&[u8], Cow<'_, str>> {
pub(crate) fn string(input: &[u8]) -> IResult<&[u8], Cow<'_, str>> {
if input.is_empty() || input[0] != b'"' {
return Err(nom::Err::Error(NomError::new(input, ErrorKind::Char)));
}
Expand Down Expand Up @@ -215,9 +216,9 @@ fn array_index(input: &[u8]) -> IResult<&[u8], ArrayIndex> {

fn array_indices(input: &[u8]) -> IResult<&[u8], Vec<ArrayIndex>> {
delimited(
terminated(char('['), multispace0),
separated_list1(delimited(multispace0, char(','), multispace0), array_index),
preceded(multispace0, char(']')),
char('['),
separated_list1(char(','), delimited(multispace0, array_index, multispace0)),
char(']'),
)(input)
}

Expand Down
120 changes: 120 additions & 0 deletions src/keypath.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright 2023 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use nom::{
branch::alt,
character::complete::{char, i32, multispace0},
combinator::map,
multi::separated_list1,
sequence::{delimited, preceded, terminated},
IResult,
};

use std::borrow::Cow;
use std::fmt::Display;
use std::fmt::Formatter;

use crate::jsonpath::raw_string;
use crate::jsonpath::string;
use crate::Error;

/// Represents a set of key path chains.
/// Compatible with PostgreSQL extracts JSON sub-object paths syntax.
#[derive(Debug, Clone, PartialEq)]
pub struct KeyPaths<'a> {
pub paths: Vec<KeyPath<'a>>,
}

/// Represents a valid key path.
#[derive(Debug, Clone, PartialEq)]
pub enum KeyPath<'a> {
/// represents the index of an Array, allow negative indexing.
Index(i32),
/// represents the quoted field name of an Object.
QuotedName(Cow<'a, str>),
/// represents the field name of an Object.
Name(Cow<'a, str>),
}

impl<'a> Display for KeyPaths<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{{")?;
for (i, path) in self.paths.iter().enumerate() {
if i > 0 {
write!(f, ",")?;
}
write!(f, "{path}")?;
}
write!(f, "}}")?;
Ok(())
}
}

impl<'a> Display for KeyPath<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
KeyPath::Index(idx) => {
write!(f, "{idx}")?;
}
KeyPath::QuotedName(name) => {
write!(f, "\"{name}\"")?;
}
KeyPath::Name(name) => {
write!(f, "{name}")?;
}
}
Ok(())
}
}

/// Parsing the input string to key paths.
pub fn parse_key_paths(input: &[u8]) -> Result<KeyPaths<'_>, Error> {
match key_paths(input) {
Ok((rest, paths)) => {
if !rest.is_empty() {
return Err(Error::InvalidKeyPath);
}
let key_paths = KeyPaths { paths };
Ok(key_paths)
}
Err(nom::Err::Error(_) | nom::Err::Failure(_)) => Err(Error::InvalidKeyPath),
Err(nom::Err::Incomplete(_)) => unreachable!(),
}
}

fn key_path(input: &[u8]) -> IResult<&[u8], KeyPath<'_>> {
alt((
map(i32, KeyPath::Index),
map(string, KeyPath::QuotedName),
map(raw_string, KeyPath::Name),
))(input)
}

fn key_paths(input: &[u8]) -> IResult<&[u8], Vec<KeyPath<'_>>> {
alt((
delimited(
preceded(multispace0, char('{')),
separated_list1(char(','), delimited(multispace0, key_path, multispace0)),
terminated(char('}'), multispace0),
),
map(
delimited(
preceded(multispace0, char('{')),
multispace0,
terminated(char('}'), multispace0),
),
|_| vec![],
),
))(input)
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ mod from;
mod functions;
mod jentry;
pub mod jsonpath;
pub mod keypath;
mod number;
mod parser;
mod ser;
Expand Down
Loading