diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index 3be3c94770db..f8824b23d1b9 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -15,99 +15,251 @@ # specific language governing permissions and limitations # under the License. +######## +## Test setup +######## -# test StringViewArray with Utf8View columns statement ok -create table test as values (arrow_cast('Andrew', 'Utf8View'), arrow_cast('X', 'Utf8View')), - (arrow_cast('Xiangpeng', 'Utf8View'), arrow_cast('Xiangpeng', 'Utf8View')), - (arrow_cast('Raphael', 'Utf8View'), arrow_cast('R', 'Utf8View')), - (arrow_cast(NULL, 'Utf8View'), arrow_cast('R', 'Utf8View')); +create table test_source as values + ('Andrew', 'X'), + ('Xiangpeng', 'Xiangpeng'), + ('Raphael', 'R'), + (NULL, 'R') +; -query B -select arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'); ----- -false +# Table with the different combination of column types +statement ok +create table test as +SELECT + arrow_cast(column1, 'Utf8') as column1_utf8, + arrow_cast(column2, 'Utf8') as column2_utf8, + arrow_cast(column1, 'Utf8View') as column1_utf8view, + arrow_cast(column2, 'Utf8View') as column2_utf8view, + arrow_cast(column1, 'Dictionary(Int32, Utf8)') as column1_dict, + arrow_cast(column2, 'Dictionary(Int32, Utf8)') as column2_dict +FROM test_source; -query B -select arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); ----- -true +statement ok +drop table test_source -query B -select arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'); ----- -true +######## +## StringView to StringView +######## -query B -select arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); +# StringView scalar to StringView scalar + +query BBBB +select + arrow_cast('NULL', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('NULL', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Xiangpeng', 'Utf8View') <> arrow_cast('Andrew', 'Utf8View'); ---- -true +false true true true + + +# StringView column to StringView column comparison as filters -query ?? -select * from test where column1 = column2; +query TT +select column1_utf8, column2_utf8 from test where column1_utf8view = column2_utf8view; ---- Xiangpeng Xiangpeng -query ?? -select * from test where column1 <> column2; +query TT +select column1_utf8, column2_utf8 from test where column1_utf8view <> column2_utf8view; ---- Andrew X Raphael R -query ?? -select * from test where column1 = arrow_cast('Andrew', 'Utf8View'); +# StringView column to StringView column +query TTBB +select + column1_utf8, column2_utf8, + column1_utf8view = column2_utf8view, + column1_utf8view <> column2_utf8view +from test; ---- -Andrew X +Andrew X false true +Xiangpeng Xiangpeng true false +Raphael R false true +NULL R NULL NULL -query ?? -select * from test where column1 = 'Andrew'; +# StringView column to StringView scalar comparison +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') = column1_utf8view, + column1_utf8view <> arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') <> column1_utf8view +from test; ---- -Andrew X +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL -query ?? -select * from test where column1 <> arrow_cast('Andrew', 'Utf8View'); +######## +## StringView to String +######## + +# test StringViewArray with Utf8 columns +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = column2_utf8, + column2_utf8 = column1_utf8view, + column1_utf8view <> column2_utf8, + column2_utf8 <> column1_utf8view +from test; ---- -Xiangpeng Xiangpeng -Raphael R +Andrew X false false true true +Xiangpeng Xiangpeng true true false false +Raphael R false false true true +NULL R NULL NULL NULL NULL -query ?? -select * from test where column1 <> 'Andrew'; +# StringView column to String scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = arrow_cast('Andrew', 'Utf8'), + arrow_cast('Andrew', 'Utf8') = column1_utf8view, + column1_utf8view <> arrow_cast('Andrew', 'Utf8'), + arrow_cast('Andrew', 'Utf8') <> column1_utf8view +from test; ---- -Xiangpeng Xiangpeng -Raphael R +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL -statement ok -drop table test; +# String column to StringView scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8 = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') = column1_utf8, + column1_utf8 <> arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') <> column1_utf8 +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + + +######## +## StringView to Dictionary +######## + +# test StringViewArray with Dictionary columns +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = column2_dict, + column2_dict = column1_utf8view, + column1_utf8view <> column2_dict, + column2_dict <> column1_utf8view +from test; +---- +Andrew X false false true true +Xiangpeng Xiangpeng true true false false +Raphael R false false true true +NULL R NULL NULL NULL NULL + +# StringView column to Dict scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'), + arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') = column1_utf8view, + column1_utf8view <> arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'), + arrow_cast('Andrew', 'Dictionary(Int32, Utf8)') <> column1_utf8view +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + +# Dict column to StringView scalar +query TTBBBB +select + column1_utf8, column2_utf8, + column1_dict = arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') = column1_dict, + column1_dict <> arrow_cast('Andrew', 'Utf8View'), + arrow_cast('Andrew', 'Utf8View') <> column1_dict +from test; +---- +Andrew X true true false false +Xiangpeng Xiangpeng false false true true +Raphael R false false true true +NULL R NULL NULL NULL NULL + + +######## +## Coercion Rules +######## -# test StringViewArray with Utf8 and Utf8View columns statement ok -create table test as values ('Andrew', arrow_cast('X', 'Utf8View')), - ('Xiangpeng', arrow_cast('Xiangpeng', 'Utf8View')), - ('Raphael', arrow_cast('R', 'Utf8View')), - (NULL, arrow_cast('R', 'Utf8View')); +set datafusion.explain.logical_plan_only = true; -query T? -select * from test where column1 = column2; + +# Filter should have a StringView literal and no column cast +query TT +explain SELECT column1_utf8 from test where column1_utf8view = 'Andrew'; ---- -Xiangpeng Xiangpeng +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: test.column1_utf8view = Utf8View("Andrew") +03)----TableScan: test projection=[column1_utf8, column1_utf8view] -query T? -select * from test where column1 <> column2; +# reverse order should be the same +query TT +explain SELECT column1_utf8 from test where 'Andrew' = column1_utf8view; ---- -Andrew X -Raphael R +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: test.column1_utf8view = Utf8View("Andrew") +03)----TableScan: test projection=[column1_utf8, column1_utf8view] -query T? -select * from test where column1 = arrow_cast('Andrew', 'Utf8View'); +# should not be casting the column: https://github.com/apache/datafusion/issues/10998 +query TT +explain SELECT column1_utf8 from test where column1_utf8 = arrow_cast('Andrew', 'Utf8View'); ---- -Andrew X +logical_plan +01)Filter: CAST(test.column1_utf8 AS Utf8View) = Utf8View("Andrew") +02)--TableScan: test projection=[column1_utf8] -query T? -select * from test where column1 <> arrow_cast('Andrew', 'Utf8View'); +query TT +explain SELECT column1_utf8 from test where column1_utf8view = arrow_cast('Andrew', 'Dictionary(Int32, Utf8)'); ---- -Xiangpeng Xiangpeng -Raphael R +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: test.column1_utf8view = Utf8View("Andrew") +03)----TableScan: test projection=[column1_utf8, column1_utf8view] + +# compare string / stringview +# Should cast string -> stringview (which is cheap), not stringview -> string (which is not) +query TT +explain SELECT column1_utf8 from test where column1_utf8view = column2_utf8; +---- +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: test.column1_utf8view = CAST(test.column2_utf8 AS Utf8View) +03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view] + +query TT +explain SELECT column1_utf8 from test where column2_utf8 = column1_utf8view; +---- +logical_plan +01)Projection: test.column1_utf8 +02)--Filter: CAST(test.column2_utf8 AS Utf8View) = test.column1_utf8view +03)----TableScan: test projection=[column1_utf8, column2_utf8, column1_utf8view] + statement ok drop table test;