Skip to content

Commit 755b26a

Browse files
authored
Support Utf8View to numeric coercion (#14377) (#14455)
* Test for string / numeric coercion * fix tests * Update tests * Add tests to stringview * add numeric coercion
1 parent 8f10fdf commit 755b26a

File tree

7 files changed

+105
-0
lines changed

7 files changed

+105
-0
lines changed

datafusion/expr-common/src/type_coercion/binary.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,8 +684,10 @@ fn string_numeric_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<D
684684
match (lhs_type, rhs_type) {
685685
(Utf8, _) if rhs_type.is_numeric() => Some(Utf8),
686686
(LargeUtf8, _) if rhs_type.is_numeric() => Some(LargeUtf8),
687+
(Utf8View, _) if rhs_type.is_numeric() => Some(Utf8View),
687688
(_, Utf8) if lhs_type.is_numeric() => Some(Utf8),
688689
(_, LargeUtf8) if lhs_type.is_numeric() => Some(LargeUtf8),
690+
(_, Utf8View) if lhs_type.is_numeric() => Some(Utf8View),
689691
_ => None,
690692
}
691693
}

datafusion/sqllogictest/test_files/string/dictionary_utf8.slt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,15 @@ statement ok
3434
create table test_substr as
3535
select arrow_cast(col1, 'Dictionary(Int32, Utf8)') as c1 from test_substr_base;
3636

37+
statement ok
38+
create table test_datetime as
39+
select
40+
arrow_cast(column1, 'Dictionary(Int32, Utf8)') as ts,
41+
arrow_cast(column2, 'Dictionary(Int32, Utf8)') as d,
42+
arrow_cast(column3, 'Dictionary(Int32, Utf8)') as t
43+
from test_datetime_base;
44+
45+
3746
statement ok
3847
drop table test_source
3948

@@ -56,3 +65,6 @@ drop table test_basic_operator;
5665

5766
statement ok
5867
drop table test_substr_base;
68+
69+
statement ok
70+
drop table test_datetime_base;

datafusion/sqllogictest/test_files/string/init_data.slt.part

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,14 @@ statement ok
3737
create table test_substr_base (
3838
col1 VARCHAR
3939
) as values ('foo'), ('hello🌏世界'), ('💩'), ('ThisIsAVeryLongASCIIString'), (''), (NULL);
40+
41+
42+
# --------------------------------------
43+
# Setup test tables with date/time values to test coercion
44+
# --------------------------------------
45+
statement ok
46+
create table test_datetime_base as values
47+
('2024-08-09T12:13:14', '2024-08-09', '12:13:14'),
48+
('2024-08-09T12:13:15', '2024-09-09', '12:14:14'),
49+
(NULL, NULL, NULL)
50+
;

datafusion/sqllogictest/test_files/string/large_string.slt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,15 @@ statement ok
3434
create table test_substr as
3535
select arrow_cast(col1, 'LargeUtf8') as c1 from test_substr_base;
3636

37+
statement ok
38+
create table test_datetime as
39+
select
40+
arrow_cast(column1, 'LargeUtf8') as ts,
41+
arrow_cast(column2, 'LargeUtf8') as d,
42+
arrow_cast(column3, 'LargeUtf8') as t
43+
from test_datetime_base;
44+
45+
3746
# select
3847
query TTTT
3948
SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
@@ -64,3 +73,6 @@ drop table test_basic_operator;
6473

6574
statement ok
6675
drop table test_substr_base;
76+
77+
statement ok
78+
drop table test_datetime_base;

datafusion/sqllogictest/test_files/string/string.slt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ statement ok
3434
create table test_substr as
3535
select arrow_cast(col1, 'Utf8') as c1 from test_substr_base;
3636

37+
statement ok
38+
create table test_datetime as
39+
select
40+
arrow_cast(column1, 'Utf8') as ts,
41+
arrow_cast(column2, 'Utf8') as d,
42+
arrow_cast(column3, 'Utf8') as t
43+
from test_datetime_base;
3744

3845

3946
#
@@ -186,3 +193,6 @@ drop table test_basic_operator;
186193

187194
statement ok
188195
drop table test_substr;
196+
197+
statement ok
198+
drop table test_datetime;

datafusion/sqllogictest/test_files/string/string_query.slt.part

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
# with standard values, but different types in string columns
2020
# (String, StringView, etc.)
2121

22+
# --------------------------------------
23+
# Show the input data
24+
# --------------------------------------
25+
2226
# select
2327
query TTTT
2428
SELECT ascii_1, ascii_2, unicode_1, unicode_2 FROM test_basic_operator
@@ -35,6 +39,49 @@ _ \_ (empty) (empty)
3539
NULL % NULL NULL
3640
NULL R NULL 🔥
3741

42+
# --------------------------------------
43+
# test type coercion (compare to int)
44+
# queries should not error
45+
# --------------------------------------
46+
47+
query BB
48+
select ascii_1 = 1 as col1, 1 = ascii_1 as col2 from test_basic_operator;
49+
----
50+
false false
51+
false false
52+
false false
53+
false false
54+
false false
55+
false false
56+
false false
57+
false false
58+
false false
59+
NULL NULL
60+
NULL NULL
61+
62+
query BB
63+
select ascii_1 <> 1 as col1, 1 <> ascii_1 as col2 from test_basic_operator;
64+
----
65+
true true
66+
true true
67+
true true
68+
true true
69+
true true
70+
true true
71+
true true
72+
true true
73+
true true
74+
NULL NULL
75+
NULL NULL
76+
77+
# Coercion to date/time
78+
query BBB
79+
select ts = '2024-08-09T12:13:14'::timestamp, d = '2024-08-08'::date, t = '12:13:14'::time from test_datetime;
80+
----
81+
true false true
82+
false false false
83+
NULL NULL NULL
84+
3885
# --------------------------------------
3986
# column comparison as filters
4087
# --------------------------------------

datafusion/sqllogictest/test_files/string/string_view.slt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ statement ok
3434
create table test_substr as
3535
select arrow_cast(col1, 'Utf8View') as c1 from test_substr_base;
3636

37+
statement ok
38+
create table test_datetime as
39+
select
40+
arrow_cast(column1, 'Utf8View') as ts,
41+
arrow_cast(column2, 'Utf8View') as d,
42+
arrow_cast(column3, 'Utf8View') as t
43+
from test_datetime_base;
44+
3745
statement ok
3846
drop table test_source
3947

@@ -51,6 +59,9 @@ drop table test_basic_operator;
5159
statement ok
5260
drop table test_substr_base;
5361

62+
statement ok
63+
drop table test_datetime_base;
64+
5465

5566
# --------------------------------------
5667
# String_view specific tests

0 commit comments

Comments
 (0)