Skip to content

Commit

Permalink
Improve LIKE performance for Dictionary arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
Lordworms committed Jun 21, 2024
1 parent c50f0dc commit 17e2ad0
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 2 deletions.
5 changes: 4 additions & 1 deletion datafusion/optimizer/src/analyzer/type_coercion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,10 @@ impl<'a> TreeNodeRewriter for TypeCoercionRewriter<'a> {
"There isn't a common type to coerce {left_type} and {right_type} in {op_name} expression"
)
})?;
let expr = Box::new(expr.cast_to(&coerced_type, self.schema)?);
let expr = match left_type {
DataType::Dictionary(_, Utf8) => expr,
_ => Box::new(expr.cast_to(&coerced_type, self.schema)?),
};
let pattern = Box::new(pattern.cast_to(&coerced_type, self.schema)?);
Ok(Transformed::yes(Expr::Like(Like::new(
negated,
Expand Down
10 changes: 9 additions & 1 deletion datafusion/physical-expr/src/expressions/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,14 @@ impl PartialEq<dyn Any> for LikeExpr {
}
}

/// used for optimize Dictionary like
fn can_like_type(from_type: &DataType) -> bool {
match from_type {
DataType::Dictionary(_, inner_type_from) => **inner_type_from == DataType::Utf8,
_ => false,
}
}

/// Create a like expression, erroring if the argument types are not compatible.
pub fn like(
negated: bool,
Expand All @@ -158,7 +166,7 @@ pub fn like(
) -> Result<Arc<dyn PhysicalExpr>> {
let expr_type = &expr.data_type(input_schema)?;
let pattern_type = &pattern.data_type(input_schema)?;
if !expr_type.eq(pattern_type) {
if !expr_type.eq(pattern_type) && !can_like_type(expr_type) {
return internal_err!(
"The type of {expr_type} AND {pattern_type} of like physical should be same"
);
Expand Down
26 changes: 26 additions & 0 deletions datafusion/sqllogictest/test_files/regexp.slt
Original file line number Diff line number Diff line change
Expand Up @@ -322,3 +322,29 @@ true

statement ok
drop table t;

statement ok
create or replace table strings as values
('FooBar'),
('Foo'),
('Foo'),
('Bar'),
('FooBar'),
('Bar'),
('Baz');

statement ok
create or replace table dict_table as
select arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1
from strings;

query TT
explain select column1 from dict_table where column1 LIKE '%oo%';
----
logical_plan
01)Filter: dict_table.column1 LIKE Utf8("%oo%")
02)--TableScan: dict_table projection=[column1]
physical_plan
01)CoalesceBatchesExec: target_batch_size=8192
02)--FilterExec: column1@0 LIKE %oo%
03)----MemoryExec: partitions=1, partition_sizes=[1]

0 comments on commit 17e2ad0

Please sign in to comment.