Skip to content

Commit

Permalink
feat(cubesql): Support [I]LIKE ... ESCAPE ... SQL push down
Browse files Browse the repository at this point in the history
  • Loading branch information
MazterQyou authored Sep 16, 2024
1 parent 6f8b097 commit 2bda0dd
Show file tree
Hide file tree
Showing 12 changed files with 332 additions and 6 deletions.
3 changes: 3 additions & 0 deletions packages/cubejs-schema-compiler/src/adapter/BaseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -3225,6 +3225,9 @@ export class BaseQuery {
not: 'NOT ({{ expr }})',
true: 'TRUE',
false: 'FALSE',
like: '{{ expr }} {% if negated %}NOT {% endif %}LIKE {{ pattern }}',
ilike: '{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}',
like_escape: '{{ like_expr }} ESCAPE {{ escape_char }}',
},
quotes: {
identifiers: '"',
Expand Down
2 changes: 2 additions & 0 deletions packages/cubejs-schema-compiler/src/adapter/BigqueryQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,8 @@ export class BigqueryQuery extends BaseQuery {
templates.expressions.interval = 'INTERVAL {{ interval }}';
templates.expressions.extract = 'EXTRACT({% if date_part == \'DOW\' %}DAYOFWEEK{% elif date_part == \'DOY\' %}DAYOFYEAR{% else %}{{ date_part }}{% endif %} FROM {{ expr }})';
templates.expressions.timestamp_literal = 'TIMESTAMP(\'{{ value }}\')';
delete templates.expressions.ilike;
delete templates.expressions.like_escape;
templates.types.boolean = 'BOOL';
templates.types.float = 'FLOAT64';
templates.types.double = 'FLOAT64';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ export class ClickHouseQuery extends BaseQuery {
// TODO: Introduce additional filter in jinja? or parseDateTimeBestEffort?
// https://github.com/ClickHouse/ClickHouse/issues/19351
templates.expressions.timestamp_literal = 'parseDateTimeBestEffort(\'{{ value }}\')';
delete templates.expressions.like_escape;
templates.quotes.identifiers = '`';
templates.quotes.escape = '\\`';
templates.types.boolean = 'BOOL';
Expand Down
1 change: 1 addition & 0 deletions packages/cubejs-schema-compiler/src/adapter/MssqlQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ export class MssqlQuery extends BaseQuery {
const templates = super.sqlTemplates();
templates.functions.LEAST = 'LEAST({{ args_concat }})';
templates.functions.GREATEST = 'GREATEST({{ args_concat }})';
delete templates.expressions.ilike;
templates.types.string = 'VARCHAR';
templates.types.boolean = 'BIT';
templates.types.integer = 'INT';
Expand Down
1 change: 1 addition & 0 deletions packages/cubejs-schema-compiler/src/adapter/MysqlQuery.ts
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ export class MysqlQuery extends BaseQuery {
const templates = super.sqlTemplates();
templates.quotes.identifiers = '`';
templates.quotes.escape = '\\`';
delete templates.expressions.ilike;
templates.types.string = 'VARCHAR';
templates.types.boolean = 'TINYINT';
templates.types.timestamp = 'DATETIME';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ export class PrestodbQuery extends BaseQuery {
templates.expressions.extract = 'EXTRACT({{ date_part }} FROM {{ expr }})';
templates.expressions.interval_single_date_part = 'INTERVAL \'{{ num }}\' {{ date_part }}';
templates.expressions.timestamp_literal = 'from_iso8601_timestamp(\'{{ value }}\')';
delete templates.expressions.ilike;
templates.types.string = 'VARCHAR';
templates.types.float = 'REAL';
// Presto intervals have a YearMonth or DayTime type variants, but no universal type
Expand Down
94 changes: 91 additions & 3 deletions rust/cubesql/cubesql/src/compile/engine/df/wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
filters::Decimal,
utils::{DecomposedDayTime, DecomposedMonthDayNano},
},
WrappedSelectType,
LikeType, WrappedSelectType,
},
},
config::ConfigObj,
Expand Down Expand Up @@ -1285,8 +1285,96 @@ impl CubeScanWrapperNode {
Ok((resulting_sql, sql_query))
}
// Expr::AnyExpr { .. } => {}
// Expr::Like(_) => {}-=
// Expr::ILike(_) => {}
Expr::Like(like) => {
let (expr, sql_query) = Self::generate_sql_for_expr(
plan.clone(),
sql_query,
sql_generator.clone(),
*like.expr,
ungrouped_scan_node.clone(),
subqueries.clone(),
)
.await?;
let (pattern, sql_query) = Self::generate_sql_for_expr(
plan.clone(),
sql_query,
sql_generator.clone(),
*like.pattern,
ungrouped_scan_node.clone(),
subqueries.clone(),
)
.await?;
let (escape_char, sql_query) = match like.escape_char {
Some(escape_char) => {
let (escape_char, sql_query) = Self::generate_sql_for_expr(
plan.clone(),
sql_query,
sql_generator.clone(),
Expr::Literal(ScalarValue::Utf8(Some(escape_char.to_string()))),
ungrouped_scan_node.clone(),
subqueries.clone(),
)
.await?;
(Some(escape_char), sql_query)
}
None => (None, sql_query),
};
let resulting_sql = sql_generator
.get_sql_templates()
.like_expr(LikeType::Like, expr, like.negated, pattern, escape_char)
.map_err(|e| {
DataFusionError::Internal(format!(
"Can't generate SQL for like expr: {}",
e
))
})?;
Ok((resulting_sql, sql_query))
}
Expr::ILike(ilike) => {
let (expr, sql_query) = Self::generate_sql_for_expr(
plan.clone(),
sql_query,
sql_generator.clone(),
*ilike.expr,
ungrouped_scan_node.clone(),
subqueries.clone(),
)
.await?;
let (pattern, sql_query) = Self::generate_sql_for_expr(
plan.clone(),
sql_query,
sql_generator.clone(),
*ilike.pattern,
ungrouped_scan_node.clone(),
subqueries.clone(),
)
.await?;
let (escape_char, sql_query) = match ilike.escape_char {
Some(escape_char) => {
let (escape_char, sql_query) = Self::generate_sql_for_expr(
plan.clone(),
sql_query,
sql_generator.clone(),
Expr::Literal(ScalarValue::Utf8(Some(escape_char.to_string()))),
ungrouped_scan_node.clone(),
subqueries.clone(),
)
.await?;
(Some(escape_char), sql_query)
}
None => (None, sql_query),
};
let resulting_sql = sql_generator
.get_sql_templates()
.like_expr(LikeType::ILike, expr, ilike.negated, pattern, escape_char)
.map_err(|e| {
DataFusionError::Internal(format!(
"Can't generate SQL for ilike expr: {}",
e
))
})?;
Ok((resulting_sql, sql_query))
}
// Expr::SimilarTo(_) => {}
Expr::Not(expr) => {
let (expr, sql_query) = Self::generate_sql_for_expr(
Expand Down
52 changes: 52 additions & 0 deletions rust/cubesql/cubesql/src/compile/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18440,4 +18440,56 @@ LIMIT {{ limit }}{% endif %}"#.to_string(),

Ok(())
}

#[tokio::test]
async fn test_thoughtspot_like_escape_push_down() {
if !Rewriter::sql_push_down_enabled() {
return;
}
init_testing_logger();

let query_plan = convert_select_to_query_plan(
r#"
SELECT CAST("customer_gender" AS TEXT) AS "customer_gender"
FROM "public"."KibanaSampleDataEcommerce"
WHERE
"customer_gender" LIKE (
'%' || replace(
replace(
replace(
'ale',
'!',
'!!'
),
'%',
'!%'
),
'_',
'!_'
) || '%'
) ESCAPE '!'
GROUP BY 1
ORDER BY 1
LIMIT 100
"#
.to_string(),
DatabaseProtocol::PostgreSQL,
)
.await;

let logical_plan = query_plan.as_logical_plan();
let sql = logical_plan
.find_cube_scan_wrapper()
.wrapped_sql
.unwrap()
.sql;
assert!(sql.contains("LIKE "));
assert!(sql.contains("ESCAPE "));

let physical_plan = query_plan.as_physical_plan().await.unwrap();
println!(
"Physical plan: {}",
displayable(physical_plan.as_ref()).indent()
);
}
}
136 changes: 136 additions & 0 deletions rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/like_expr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
use crate::{
compile::rewrite::{
analysis::LogicalPlanAnalysis, like_expr, rewrite, rules::wrapper::WrapperRules,
transforming_rewrite, wrapper_pullup_replacer, wrapper_pushdown_replacer,
LikeExprEscapeChar, LikeExprLikeType, LikeType, LogicalPlanLanguage,
WrapperPullupReplacerAliasToCube,
},
var, var_iter,
};
use egg::{EGraph, Rewrite, Subst};

impl WrapperRules {
pub fn like_expr_rules(
&self,
rules: &mut Vec<Rewrite<LogicalPlanLanguage, LogicalPlanAnalysis>>,
) {
rules.extend(vec![
rewrite(
"wrapper-push-down-like-expr",
wrapper_pushdown_replacer(
like_expr(
"?like_type",
"?negated",
"?expr",
"?pattern",
"?escape_char",
),
"?alias_to_cube",
"?ungrouped",
"?in_projection",
"?cube_members",
),
like_expr(
"?like_type",
"?negated",
wrapper_pushdown_replacer(
"?expr",
"?alias_to_cube",
"?ungrouped",
"?in_projection",
"?cube_members",
),
wrapper_pushdown_replacer(
"?pattern",
"?alias_to_cube",
"?ungrouped",
"?in_projection",
"?cube_members",
),
"?escape_char",
),
),
transforming_rewrite(
"wrapper-pull-up-like-expr",
like_expr(
"?like_type",
"?negated",
wrapper_pullup_replacer(
"?expr",
"?alias_to_cube",
"?ungrouped",
"?in_projection",
"?cube_members",
),
wrapper_pullup_replacer(
"?pattern",
"?alias_to_cube",
"?ungrouped",
"?in_projection",
"?cube_members",
),
"?escape_char",
),
wrapper_pullup_replacer(
like_expr(
"?like_type",
"?negated",
"?expr",
"?pattern",
"?escape_char",
),
"?alias_to_cube",
"?ungrouped",
"?in_projection",
"?cube_members",
),
self.transform_like_expr("?alias_to_cube", "?like_type", "?escape_char"),
),
]);
}

fn transform_like_expr(
&self,
alias_to_cube_var: &'static str,
like_type_var: &'static str,
escape_char_var: &'static str,
) -> impl Fn(&mut EGraph<LogicalPlanLanguage, LogicalPlanAnalysis>, &mut Subst) -> bool {
let alias_to_cube_var = var!(alias_to_cube_var);
let like_type_var = var!(like_type_var);
let escape_char_var = var!(escape_char_var);
let meta = self.meta_context.clone();
move |egraph, subst| {
for alias_to_cube in var_iter!(
egraph[subst[alias_to_cube_var]],
WrapperPullupReplacerAliasToCube
) {
let Some(sql_generator) = meta.sql_generator_by_alias_to_cube(&alias_to_cube)
else {
continue;
};

let templates = &sql_generator.get_sql_templates().templates;

for escape_char in var_iter!(egraph[subst[escape_char_var]], LikeExprEscapeChar) {
if escape_char.is_some() {
if !templates.contains_key("expressions/like_escape") {
continue;
}
}

for like_type in var_iter!(egraph[subst[like_type_var]], LikeExprLikeType) {
let expression_name = match like_type {
LikeType::Like => "like",
LikeType::ILike => "ilike",
_ => continue,
};
if templates.contains_key(&format!("expressions/{}", expression_name)) {
return true;
}
}
}
}
false
}
}
}
2 changes: 2 additions & 0 deletions rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ mod filter;
mod in_list_expr;
mod in_subquery_expr;
mod is_null_expr;
mod like_expr;
mod limit;
mod literal;
mod negative_expr;
Expand Down Expand Up @@ -82,6 +83,7 @@ impl RewriteRules for WrapperRules {
self.negative_expr_rules(&mut rules);
self.not_expr_rules(&mut rules);
self.distinct_rules(&mut rules);
self.like_expr_rules(&mut rules);

rules
}
Expand Down
3 changes: 3 additions & 0 deletions rust/cubesql/cubesql/src/compile/test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,9 @@ OFFSET {{ offset }}{% endif %}"#.to_string(),
("expressions/true".to_string(), "TRUE".to_string()),
("expressions/false".to_string(), "FALSE".to_string()),
("expressions/timestamp_literal".to_string(), "timestamptz '{{ value }}'".to_string()),
("expressions/like".to_string(), "{{ expr }} {% if negated %}NOT {% endif %}LIKE {{ pattern }}".to_string()),
("expressions/ilike".to_string(), "{{ expr }} {% if negated %}NOT {% endif %}ILIKE {{ pattern }}".to_string()),
("expressions/like_escape".to_string(), "{{ like_expr }} ESCAPE {{ escape_char }}".to_string()),
("quotes/identifiers".to_string(), "\"".to_string()),
("quotes/escape".to_string(), "\"\"".to_string()),
("params/param".to_string(), "${{ param_index + 1 }}".to_string()),
Expand Down
Loading

0 comments on commit 2bda0dd

Please sign in to comment.