Skip to content

Commit

Permalink
Cast Utf8View to Utf8 to support || from StringViewArray (#11796
Browse files Browse the repository at this point in the history
)

* make query work

* hack string_concat_coercion

* more tests
  • Loading branch information
dharanad committed Aug 7, 2024
1 parent cd322f1 commit 60d1d3a
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 8 deletions.
23 changes: 15 additions & 8 deletions datafusion/expr/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -890,15 +890,22 @@ fn dictionary_coercion(
/// 2. Data type of the other side should be able to cast to string type
fn string_concat_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
use arrow::datatypes::DataType::*;
string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
(Utf8, from_type) | (from_type, Utf8) => {
string_concat_internal_coercion(from_type, &Utf8)
}
(LargeUtf8, from_type) | (from_type, LargeUtf8) => {
string_concat_internal_coercion(from_type, &LargeUtf8)
match (lhs_type, rhs_type) {
// If Utf8View is in any side, we coerce to Utf8.
// Ref: https://github.com/apache/datafusion/pull/11796
(Utf8View, Utf8View | Utf8 | LargeUtf8) | (Utf8 | LargeUtf8, Utf8View) => {
Some(Utf8)
}
_ => None,
})
_ => string_coercion(lhs_type, rhs_type).or(match (lhs_type, rhs_type) {
(Utf8, from_type) | (from_type, Utf8) => {
string_concat_internal_coercion(from_type, &Utf8)
}
(LargeUtf8, from_type) | (from_type, LargeUtf8) => {
string_concat_internal_coercion(from_type, &LargeUtf8)
}
_ => None,
}),
}
}

fn array_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType> {
Expand Down
53 changes: 53 additions & 0 deletions datafusion/sqllogictest/test_files/string_view.slt
Original file line number Diff line number Diff line change
Expand Up @@ -447,3 +447,56 @@ select t.dt from dates t where arrow_cast('2024-01-01', 'Utf8View') < t.dt;

statement ok
drop table dates;

statement ok
create table temp as values
('value1', arrow_cast('rust', 'Utf8View'), arrow_cast('fast', 'Utf8View')),
('value2', arrow_cast('datafusion', 'Utf8View'), arrow_cast('cool', 'Utf8View'));

query T
select column2||' is fast' from temp;
----
rust is fast
datafusion is fast


query T
select column2 || ' is ' || column3 from temp;
----
rust is fast
datafusion is cool

query TT
explain select column2 || 'is' || column3 from temp;
----
logical_plan
01)Projection: CAST(temp.column2 AS Utf8) || Utf8("is") || CAST(temp.column3 AS Utf8)
02)--TableScan: temp projection=[column2, column3]


query TT
explain select column2||' is fast' from temp;
----
logical_plan
01)Projection: CAST(temp.column2 AS Utf8) || Utf8(" is fast")
02)--TableScan: temp projection=[column2]


query T
select column2||column3 from temp;
----
rustfast
datafusioncool

query TT
explain select column2||column3 from temp;
----
logical_plan
01)Projection: CAST(temp.column2 AS Utf8) || CAST(temp.column3 AS Utf8)
02)--TableScan: temp projection=[column2, column3]

query T
select column2|| ' ' ||column3 from temp;
----
rust fast
datafusion cool

0 comments on commit 60d1d3a

Please sign in to comment.