diff --git a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt index f7a81f08456f..1b37ddcb3526 100644 --- a/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt +++ b/datafusion/sqllogictest/test_files/parquet_sorted_statistics.slt @@ -260,3 +260,79 @@ physical_plan 01)SortPreservingMergeExec: [constant_col@0 ASC NULLS LAST] 02)--SortExec: expr=[constant_col@0 ASC NULLS LAST], preserve_partitioning=[true] 03)----ParquetExec: file_groups={2 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=A/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=B/1.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/test_table/partition_col=C/2.parquet]]}, projection=[constant_col] + +statement ok +DROP TABLE test_table; + +# Create a table as a data source +statement ok +CREATE TABLE int_table ( + a INT, + b INT, + c INT +) AS VALUES +-- first file +(3, 100, 0), +(2, 200, 0), +(1, 300, 0), +-- second file +(9, 700, 0), +(8, 800, 0), +(7, 900, 0); + +# Create files: 0.parquet, 1.parquet both sorted on a, the schema of the files is different but compatible +# but file 1 has the columns in the order a, b, c and file 2 has the columns in the order c, b, a +# The keyranges of values of a should be non overlapping + +# Create 0.parquet +query III +COPY (SELECT * FROM int_table WHERE a <= 3 ORDER BY a) +TO 'test_files/scratch/parquet_sorted_statistics/int_table/0.parquet' +STORED AS PARQUET; +---- +3 + +# Create 1.parquet +query III +COPY (SELECT c, b, a FROM int_table WHERE a > 6 ORDER BY a) +TO 'test_files/scratch/parquet_sorted_statistics/int_table/1.parquet' +STORED AS PARQUET; +---- +3 + +# Create an external table a, b, c with explicit order by a +statement ok +CREATE EXTERNAL TABLE test_table ( + partition_col TEXT NOT NULL, + a INT NOT NULL, + b INT NOT NULL, + c INT NOT NULL +) +STORED AS PARQUET +PARTITIONED BY (partition_col) +WITH ORDER (a ASC NULLS LAST) +LOCATION 'test_files/scratch/parquet_sorted_statistics/int_table'; + +# Make sure the output plan doesn't use sort preserving merge +query TT +EXPLAIN SELECT * +FROM test_table +ORDER BY a; +---- +logical_plan +01)Sort: test_table.a ASC NULLS LAST +02)--TableScan: test_table projection=[a, b, c, partition_col] +physical_plan ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/int_table/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/int_table/1.parquet]]}, projection=[a, b, c, partition_col], output_ordering=[a@0 ASC NULLS LAST] + +# Make sure the output plan doesn't use sort preserving merge +query TT +EXPLAIN SELECT * +FROM test_table +ORDER BY b; +---- +logical_plan +01)Sort: test_table.b ASC NULLS LAST +02)--TableScan: test_table projection=[a, b, c, partition_col] +physical_plan +01)SortExec: expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false] +02)--ParquetExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/int_table/0.parquet, WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_sorted_statistics/int_table/1.parquet]]}, projection=[a, b, c, partition_col], output_ordering=[a@0 ASC NULLS LAST] \ No newline at end of file