Skip to content

Commit

Permalink
feat: add json_path_match & @?,@@ operators
Browse files Browse the repository at this point in the history
  • Loading branch information
akoshchiy committed Dec 3, 2023
1 parent 3111235 commit c084901
Show file tree
Hide file tree
Showing 12 changed files with 770 additions and 50 deletions.
20 changes: 10 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ opendal = { version = "0.42", features = [
] }
ethnum = { version = "1.3.2" }
ordered-float = { version = "3.6.0", default-features = false }
jsonb = { git = "https://github.com/datafuselabs/jsonb", rev = "1d7a3e9" }
jsonb = { git = "https://github.com/datafuselabs/jsonb", rev = "582c139" }

# openraft = { version = "0.8.2", features = ["compat-07"] }
# For debugging
Expand Down
12 changes: 12 additions & 0 deletions src/query/ast/src/ast/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,10 @@ pub enum JsonOperator {
AtArrow,
/// <@ Checks whether right json contains the left json
ArrowAt,
/// @? Checks whether JSON path return any item for the specified JSON value
AtQuestion,
/// @@ Returns the result of a JSON path predicate check for the specified JSON value.
AtAt,
}

impl JsonOperator {
Expand All @@ -507,6 +511,8 @@ impl JsonOperator {
JsonOperator::QuestionAnd => "json_exists_all_keys".to_string(),
JsonOperator::AtArrow => "json_contains_in_left".to_string(),
JsonOperator::ArrowAt => "json_contains_in_right".to_string(),
JsonOperator::AtQuestion => "json_path_exists".to_string(),
JsonOperator::AtAt => "json_path_match".to_string(),
}
}
}
Expand Down Expand Up @@ -777,6 +783,12 @@ impl Display for JsonOperator {
JsonOperator::ArrowAt => {
write!(f, "<@")
}
JsonOperator::AtQuestion => {
write!(f, "@?")
}
JsonOperator::AtAt => {
write!(f, "@@")
}
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/query/ast/src/parser/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,8 @@ pub fn json_op(i: Input) -> IResult<JsonOperator> {
value(JsonOperator::QuestionAnd, rule! { "?&" }),
value(JsonOperator::AtArrow, rule! { "@>" }),
value(JsonOperator::ArrowAt, rule! { "<@" }),
value(JsonOperator::AtQuestion, rule! { "@?" }),
value(JsonOperator::AtAt, rule! { "@@" }),
))(i)
}

Expand Down
6 changes: 6 additions & 0 deletions src/query/ast/src/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,12 @@ pub enum TokenKind {
/// Used as JSON operator.
#[token("@>")]
AtArrow,
/// Used as JSON operator.
#[token("@?")]
AtQuestion,
/// Used as JSON operator.
#[token("@@")]
AtAt,

// Keywords
//
Expand Down
2 changes: 1 addition & 1 deletion src/query/ast/tests/it/testdata/expr-error.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ error:
--> SQL:1:10
|
1 | CAST(col1)
| ---- ^ unexpected `)`, expecting `AS`, `,`, `(`, `IS`, `NOT`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `REGEXP`, `RLIKE`, `SOUNDS`, <BitWiseOr>, <BitWiseAnd>, <BitWiseXor>, <ShiftLeft>, <ShiftRight>, `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, <Factorial>, <SquareRoot>, <BitWiseNot>, <CubeRoot>, <Abs>, `CAST`, `TRY_CAST`, `DATE_ADD`, `DATE_SUB`, `DATE_TRUNC`, `DATE`, `TIMESTAMP`, `INTERVAL`, `::`, or 26 more ...
| ---- ^ unexpected `)`, expecting `AS`, `,`, `(`, `IS`, `NOT`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `REGEXP`, `RLIKE`, `SOUNDS`, <BitWiseOr>, <BitWiseAnd>, <BitWiseXor>, <ShiftLeft>, <ShiftRight>, `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, <Factorial>, <SquareRoot>, <BitWiseNot>, <CubeRoot>, <Abs>, `CAST`, `TRY_CAST`, `DATE_ADD`, `DATE_SUB`, `DATE_TRUNC`, `DATE`, `TIMESTAMP`, or 28 more ...
| |
| while parsing `CAST(... AS ...)`
| while parsing expression
Expand Down
2 changes: 1 addition & 1 deletion src/query/ast/tests/it/testdata/statement-error.txt
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ error:
--> SQL:1:41
|
1 | SELECT * FROM t GROUP BY GROUPING SETS ()
| ------ ^ unexpected `)`, expecting `(`, `IS`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, <BitWiseOr>, <BitWiseAnd>, <BitWiseXor>, <ShiftLeft>, <ShiftRight>, `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, <Factorial>, <SquareRoot>, <BitWiseNot>, <CubeRoot>, <Abs>, `CAST`, `TRY_CAST`, `DATE_ADD`, `DATE_SUB`, `DATE_TRUNC`, `DATE`, `TIMESTAMP`, `INTERVAL`, `::`, `EXTRACT`, `DATE_PART`, or 24 more ...
| ------ ^ unexpected `)`, expecting `(`, `IS`, `IN`, `EXISTS`, `BETWEEN`, `+`, `-`, `*`, `/`, `//`, `DIV`, `%`, `||`, `<->`, `>`, `<`, `>=`, `<=`, `=`, `<>`, `!=`, `^`, `AND`, `OR`, `XOR`, `LIKE`, `NOT`, `REGEXP`, `RLIKE`, `SOUNDS`, <BitWiseOr>, <BitWiseAnd>, <BitWiseXor>, <ShiftLeft>, <ShiftRight>, `->`, `->>`, `#>`, `#>>`, `?`, `?|`, `?&`, `@>`, `<@`, `@?`, `@@`, <Factorial>, <SquareRoot>, <BitWiseNot>, <CubeRoot>, <Abs>, `CAST`, `TRY_CAST`, `DATE_ADD`, `DATE_SUB`, `DATE_TRUNC`, `DATE`, `TIMESTAMP`, `INTERVAL`, `::`, or 26 more ...
| |
| while parsing `SELECT ...`

Expand Down
168 changes: 142 additions & 26 deletions src/query/functions/src/scalars/variant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,12 @@ use jsonb::get_by_path_first;
use jsonb::is_array;
use jsonb::is_object;
use jsonb::jsonpath::parse_json_path;
use jsonb::jsonpath::JsonPath;
use jsonb::keypath::parse_key_paths;
use jsonb::object_keys;
use jsonb::parse_value;
use jsonb::path_exists;
use jsonb::path_match;
use jsonb::strip_nulls;
use jsonb::to_bool;
use jsonb::to_f64;
Expand Down Expand Up @@ -513,33 +515,53 @@ pub fn register(registry: &mut FunctionRegistry) {
),
);

registry.register_passthrough_nullable_2_arg::<VariantType, StringType, BooleanType, _, _>(
"json_path_exists",
|_, _, _| FunctionDomain::MayThrow,
vectorize_with_builder_2_arg::<VariantType, StringType, BooleanType>(
|val, path, output, ctx| {
if let Some(validity) = &ctx.validity {
if !validity.get_bit(output.len()) {
output.push(false);
return;
}
}
match parse_json_path(path) {
Ok(json_path) => {
let res = path_exists(val, json_path);
output.push(res);
}
Err(_) => {
ctx.set_error(
output.len(),
format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(path),),
);
output.push(false);
}
}
registry.register_function_factory("json_path_match", |_, args_type| {
if args_type.len() != 2 {
return None;
}
if (args_type[0].remove_nullable() != DataType::Variant && args_type[0] != DataType::Null)
|| (args_type[1].remove_nullable() != DataType::String
&& args_type[1] != DataType::Null)
{
return None;
}
Some(Arc::new(Function {
signature: FunctionSignature {
name: "json_path_match".to_string(),
args_type: args_type.to_vec(),
return_type: DataType::Nullable(Box::new(DataType::Boolean)),
},
),
);
eval: FunctionEval::Scalar {
calc_domain: Box::new(|_, _| FunctionDomain::MayThrow),
eval: Box::new(|args, ctx| path_predicate_fn(args, ctx, path_match)),
},
}))
});

registry.register_function_factory("json_path_exists", |_, args_type| {
if args_type.len() != 2 {
return None;
}
if (args_type[0].remove_nullable() != DataType::Variant && args_type[0] != DataType::Null)
|| (args_type[1].remove_nullable() != DataType::String
&& args_type[1] != DataType::Null)
{
return None;
}
Some(Arc::new(Function {
signature: FunctionSignature {
name: "json_path_exists".to_string(),
args_type: args_type.to_vec(),
return_type: DataType::Nullable(Box::new(DataType::Boolean)),
},
eval: FunctionEval::Scalar {
calc_domain: Box::new(|_, _| FunctionDomain::Full),
eval: Box::new(|args, ctx| {
path_predicate_fn(args, ctx, |json, path| Ok(path_exists(json, path)))
}),
},
}))
});

registry.register_combine_nullable_2_arg::<VariantType, StringType, VariantType, _, _>(
"get_path",
Expand Down Expand Up @@ -1470,3 +1492,97 @@ fn get_by_keypath_fn(
}
}
}

fn path_predicate_fn<'a, P>(
args: &'a [ValueRef<AnyType>],
ctx: &'a mut EvalContext,
predicate: P,
) -> Value<AnyType>
where
P: Fn(&'a [u8], JsonPath<'a>) -> Result<bool, jsonb::Error>,
{
let scalar_jsonpath = match &args[1] {
ValueRef::Scalar(ScalarRef::String(v)) => {
let res = parse_json_path(v)
.map_err(|_| format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(v)));
Some(res)
}
_ => None,
};

let len_opt = args.iter().find_map(|arg| match arg {
ValueRef::Column(col) => Some(col.len()),
_ => None,
});
let len = len_opt.unwrap_or(1);

let mut output = MutableBitmap::with_capacity(len);
let mut validity = MutableBitmap::with_capacity(len);

for idx in 0..len {
let jsonpath = match &args[1] {
ValueRef::Scalar(_) => scalar_jsonpath.clone(),
ValueRef::Column(col) => {
let scalar = unsafe { col.index_unchecked(idx) };
match scalar {
ScalarRef::String(buf) => {
let res = parse_json_path(buf).map_err(|_| {
format!("Invalid JSON Path '{}'", &String::from_utf8_lossy(buf))
});
Some(res)
}
_ => None,
}
}
};
match jsonpath {
Some(result) => match result {
Ok(path) => {
let json_row = match &args[0] {
ValueRef::Scalar(scalar) => scalar.clone(),
ValueRef::Column(col) => unsafe { col.index_unchecked(idx) },
};
match json_row {
ScalarRef::Variant(json) => match predicate(json, path) {
Ok(r) => {
output.push(r);
validity.push(true);
}
Err(err) => {
ctx.set_error(output.len(), err.to_string());
output.push(false);
validity.push(false);
}
},
_ => {
output.push(false);
validity.push(false);
}
}
}
Err(err) => {
ctx.set_error(output.len(), err);
output.push(false);
validity.push(false);
}
},
None => {
output.push(false);
validity.push(false);
}
}
}

let validity: Bitmap = validity.into();

match len_opt {
Some(_) => Value::Column(Column::Boolean(output.into())).wrap_nullable(Some(validity)),
None => {
if !validity.get_bit(0) {
Value::Scalar(Scalar::Null)
} else {
Value::Scalar(Scalar::Boolean(output.get(0)))
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1870,8 +1870,8 @@ Functions overloads:
0 json_object FACTORY
0 json_object_keep_null FACTORY
0 json_object_keys(Variant NULL) :: Variant NULL
0 json_path_exists(Variant, String) :: Boolean
1 json_path_exists(Variant NULL, String NULL) :: Boolean NULL
0 json_path_exists FACTORY
0 json_path_match FACTORY
0 json_path_query FACTORY
0 json_path_query_array(Variant, String) :: Variant NULL
1 json_path_query_array(Variant NULL, String NULL) :: Variant NULL
Expand Down
Loading

0 comments on commit c084901

Please sign in to comment.