From 0f8e9a8cf7c327468645818a0fba3e736b9e512a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 09:49:21 +0200
Subject: [PATCH 01/22] Filter null keys by default

---
 datafusion/common/src/config.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 19978e102cc8..1e1b601c190e 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -514,7 +514,7 @@ config_namespace! {
         /// a nullable and non-nullable column to filter out nulls on the nullable side. This
         /// filter can add additional overhead when the file format does not fully support
         /// predicate push down.
-        pub filter_null_join_keys: bool, default = false
+        pub filter_null_join_keys: bool, default = true
 
         /// Should DataFusion repartition data using the aggregate keys to execute aggregates
         /// in parallel using the provided `target_partitions` level

From 16247408ccfafb910af94fe37417eaf8f43efbd5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 10:10:43 +0200
Subject: [PATCH 02/22] null_equals_null

---
 datafusion/optimizer/src/filter_null_join_keys.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/optimizer/src/filter_null_join_keys.rs b/datafusion/optimizer/src/filter_null_join_keys.rs
index 01e3d27c580f..c5beb3709281 100644
--- a/datafusion/optimizer/src/filter_null_join_keys.rs
+++ b/datafusion/optimizer/src/filter_null_join_keys.rs
@@ -50,7 +50,7 @@ impl OptimizerRule for FilterNullJoinKeys {
             return Ok(Transformed::no(plan));
         }
         match plan {
-            LogicalPlan::Join(mut join) if !join.on.is_empty() => {
+            LogicalPlan::Join(mut join) if !join.on.is_empty() && !join.null_equals_null() => {
                 let (left_preserved, right_preserved) =
                     on_lr_is_preserved(join.join_type);
 

From 3de10179788ee8da04fb6899f490a72f2e341c34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 10:11:34 +0200
Subject: [PATCH 03/22] Docs

---
 docs/source/user-guide/cli/usage.md              | 4 ++--
 docs/source/user-guide/configs.md                | 2 +-
 docs/source/user-guide/sql/information_schema.md | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/user-guide/cli/usage.md b/docs/source/user-guide/cli/usage.md
index 6a620fc69252..ecc8ade83327 100644
--- a/docs/source/user-guide/cli/usage.md
+++ b/docs/source/user-guide/cli/usage.md
@@ -138,7 +138,7 @@ Show configuration options
 | datafusion.execution.time_zone                  | UTC     |
 | datafusion.explain.logical_plan_only            | false   |
 | datafusion.explain.physical_plan_only           | false   |
-| datafusion.optimizer.filter_null_join_keys      | false   |
+| datafusion.optimizer.filter_null_join_keys      | true    |
 | datafusion.optimizer.skip_failed_rules          | true    |
 +-------------------------------------------------+---------+
 
@@ -191,7 +191,7 @@ DataFusion CLI v12.0.0
 | datafusion.execution.time_zone                  | UTC     |
 | datafusion.explain.logical_plan_only            | false   |
 | datafusion.explain.physical_plan_only           | false   |
-| datafusion.optimizer.filter_null_join_keys      | false   |
+| datafusion.optimizer.filter_null_join_keys      | true    |
 | datafusion.optimizer.skip_failed_rules          | true    |
 +-------------------------------------------------+---------+
 8 rows in set. Query took 0.002 seconds.
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index 8514fb1fbd93..975cbd42c93b 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -94,7 +94,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.optimizer.enable_distinct_aggregation_soft_limit             | true                      | When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.                                                                                                                                                                                                                                                                                                                                            |
 | datafusion.optimizer.enable_round_robin_repartition                     | true                      | When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.optimizer.enable_topk_aggregation                            | true                      | When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
-| datafusion.optimizer.filter_null_join_keys                              | false                     | When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.                                                                                                                                                                                                                                                                                                          |
+| datafusion.optimizer.filter_null_join_keys                              | true                      | When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.                                                                                                                                                                                                                                                                                                          |
 | datafusion.optimizer.repartition_aggregations                           | true                      | Should DataFusion repartition data using the aggregate keys to execute aggregates in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | datafusion.optimizer.repartition_file_min_size                          | 10485760                  | Minimum total files size in bytes to perform file scan repartitioning.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.optimizer.repartition_joins                                  | true                      | Should DataFusion repartition data using the join keys to execute joins in parallel using the provided `target_partitions` level                                                                                                                                                                                                                                                                                                                                                                                                                                         |
diff --git a/docs/source/user-guide/sql/information_schema.md b/docs/source/user-guide/sql/information_schema.md
index bf4aa00e1dde..bd70ffce6ce3 100644
--- a/docs/source/user-guide/sql/information_schema.md
+++ b/docs/source/user-guide/sql/information_schema.md
@@ -65,7 +65,7 @@ select * from information_schema.df_settings;
 | datafusion.execution.time_zone                  | UTC     |
 | datafusion.explain.logical_plan_only            | false   |
 | datafusion.explain.physical_plan_only           | false   |
-| datafusion.optimizer.filter_null_join_keys      | false   |
+| datafusion.optimizer.filter_null_join_keys      | true    |
 | datafusion.optimizer.skip_failed_rules          | true    |
 +-------------------------------------------------+---------+
 ```

From 03784e25bf99b880dbb6db93a7381c89a23ad980 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 10:24:11 +0200
Subject: [PATCH 04/22] Update filter_null_join_keys.rs

---
 datafusion/optimizer/src/filter_null_join_keys.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datafusion/optimizer/src/filter_null_join_keys.rs b/datafusion/optimizer/src/filter_null_join_keys.rs
index c5beb3709281..66e24f5dee7c 100644
--- a/datafusion/optimizer/src/filter_null_join_keys.rs
+++ b/datafusion/optimizer/src/filter_null_join_keys.rs
@@ -50,7 +50,7 @@ impl OptimizerRule for FilterNullJoinKeys {
             return Ok(Transformed::no(plan));
         }
         match plan {
-            LogicalPlan::Join(mut join) if !join.on.is_empty() && !join.null_equals_null() => {
+            LogicalPlan::Join(mut join) if !join.on.is_empty() && !join.null_equals_null => {
                 let (left_preserved, right_preserved) =
                     on_lr_is_preserved(join.join_type);
 

From b45a74ffe9b8556e64d72a3fa16ad66acdd7edee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 15:40:22 +0200
Subject: [PATCH 05/22] Docs

---
 datafusion/sqllogictest/test_files/information_schema.slt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index f797a7a6539d..a81f196d0a7e 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -225,7 +225,7 @@ datafusion.optimizer.default_filter_selectivity 20
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true
 datafusion.optimizer.enable_round_robin_repartition true
 datafusion.optimizer.enable_topk_aggregation true
-datafusion.optimizer.filter_null_join_keys false
+datafusion.optimizer.filter_null_join_keys true
 datafusion.optimizer.hash_join_single_partition_threshold 1048576
 datafusion.optimizer.hash_join_single_partition_threshold_rows 131072
 datafusion.optimizer.max_passes 3
@@ -314,7 +314,7 @@ datafusion.optimizer.default_filter_selectivity 20 The default filter selectivit
 datafusion.optimizer.enable_distinct_aggregation_soft_limit true When set to true, the optimizer will push a limit operation into grouped aggregations which have no aggregate expressions, as a soft limit, emitting groups once the limit is reached, before all rows in the group are read.
 datafusion.optimizer.enable_round_robin_repartition true When set to true, the physical plan optimizer will try to add round robin repartitioning to increase parallelism to leverage more CPU cores
 datafusion.optimizer.enable_topk_aggregation true When set to true, the optimizer will attempt to perform limit operations during aggregations, if possible
-datafusion.optimizer.filter_null_join_keys false When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.
+datafusion.optimizer.filter_null_join_keys true When set to true, the optimizer will insert filters before a join between a nullable and non-nullable column to filter out nulls on the nullable side. This filter can add additional overhead when the file format does not fully support predicate push down.
 datafusion.optimizer.hash_join_single_partition_threshold 1048576 The maximum estimated size in bytes for one input side of a HashJoin will be collected into a single partition
 datafusion.optimizer.hash_join_single_partition_threshold_rows 131072 The maximum estimated size in rows for one input side of a HashJoin will be collected into a single partition
 datafusion.optimizer.max_passes 3 Number of times that the optimizer will attempt to optimize the plan

From df9e3db4ea00f6c4b068e159173f5ade342c20c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 15:45:53 +0200
Subject: [PATCH 06/22] Wip

---
 .../optimizer/src/filter_null_join_keys.rs    |  4 ++-
 .../sqllogictest/test_files/group_by.slt      | 36 ++++++++++---------
 datafusion/sqllogictest/test_files/join.slt   | 18 ++++++----
 3 files changed, 34 insertions(+), 24 deletions(-)

diff --git a/datafusion/optimizer/src/filter_null_join_keys.rs b/datafusion/optimizer/src/filter_null_join_keys.rs
index 66e24f5dee7c..c48012e211ec 100644
--- a/datafusion/optimizer/src/filter_null_join_keys.rs
+++ b/datafusion/optimizer/src/filter_null_join_keys.rs
@@ -50,7 +50,9 @@ impl OptimizerRule for FilterNullJoinKeys {
             return Ok(Transformed::no(plan));
         }
         match plan {
-            LogicalPlan::Join(mut join) if !join.on.is_empty() && !join.null_equals_null => {
+            LogicalPlan::Join(mut join)
+                if !join.on.is_empty() && !join.null_equals_null =>
+            {
                 let (left_preserved, right_preserved) =
                     on_lr_is_preserved(join.join_type);
 
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 73bfd9844609..9eb7129027a7 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2009,23 +2009,27 @@ logical_plan
 03)----Aggregate: groupBy=[[l.col0, l.col1, l.col2]], aggr=[[last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]]]
 04)------Inner Join: l.col0 = r.col0
 05)--------SubqueryAlias: l
-06)----------TableScan: tab0 projection=[col0, col1, col2]
-07)--------SubqueryAlias: r
-08)----------TableScan: tab0 projection=[col0, col1]
+06)----------Filter: tab0.col0 IS NOT NULL
+07)------------TableScan: tab0 projection=[col0, col1, col2]
+08)--------SubqueryAlias: r
+09)----------Filter: tab0.col0 IS NOT NULL
+10)------------TableScan: tab0 projection=[col0, col1]
 physical_plan
-01)SortPreservingMergeExec: [col0@0 ASC NULLS LAST]
-02)--SortExec: expr=[col0@0 ASC NULLS LAST], preserve_partitioning=[true]
-03)----ProjectionExec: expr=[col0@0 as col0, last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]@3 as last_col1]
-04)------AggregateExec: mode=FinalPartitioned, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([col0@0, col1@1, col2@2], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]]
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------ProjectionExec: expr=[col0@2 as col0, col1@3 as col1, col2@4 as col2, col0@0 as col0, col1@1 as col1]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col0@0, col0@0)]
-12)----------------------MemoryExec: partitions=1, partition_sizes=[3]
-13)----------------------MemoryExec: partitions=1, partition_sizes=[3]
+08)--------------ProjectionExec: expr=[col0@2 as col0, col1@3 as col1, col2@4 as col2, col0@0 as col0, col1@1 as col1]
+09)----------------CoalesceBatchesExec: target_batch_size=8192
+10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col0@0, col0@0)]
+11)--------------------CoalesceBatchesExec: target_batch_size=8192
+12)----------------------RepartitionExec: partitioning=Hash([col0@0], 4), input_partitions=4
+13)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+14)--------------------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------------------FilterExec: col0@0 IS NOT NULL
+16)------------------------------MemoryExec: partitions=1, partition_sizes=[3]
+17)--------------------CoalesceBatchesExec: target_batch_size=8192
+18)----------------------RepartitionExec: partitioning=Hash([col0@0], 4), input_partitions=4
+19)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+20)--------------------------CoalesceBatchesExec: target_batch_size=8192
+21)----------------------------FilterExec: col0@0 IS NOT NULL
+22)------------------------------MemoryExec: partitions=1, partition_sizes=[3]
 
 # Columns in the table are a,b,c,d. Source is CsvExec which is ordered by
 # a,b,c column. Column a has cardinality 2, column b has cardinality 4.
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 21fea4ad1025..f5865f99acfe 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -750,14 +750,18 @@ WHERE t1.a=t2.a;
 ----
 logical_plan
 01)Inner Join: t1.a = t2.a
-02)--TableScan: t1 projection=[a, b]
-03)--SubqueryAlias: t2
-04)----TableScan: t1 projection=[a, b]
+02)--Filter: t1.a IS NOT NULL
+03)----TableScan: t1 projection=[a, b]
+04)--SubqueryAlias: t2
+05)----Filter: t1.a IS NOT NULL
+06)------TableScan: t1 projection=[a, b]
 physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-03)----MemoryExec: partitions=1, partition_sizes=[1]
-04)----MemoryExec: partitions=1, partition_sizes=[1]
+03)----CoalesceBatchesExec: target_batch_size=8192
+04)------FilterExec: a@0 IS NOT NULL
+05)--------MemoryExec: partitions=1, partition_sizes=[1]
+06)----CoalesceBatchesExec: target_batch_size=8192
+07)------FilterExec: a@0 IS NOT NULL
+08)--------MemoryExec: partitions=1, partition_sizes=[1]
 
 # Reset the configs to old values
 statement ok

From dc876a9c85438902b65e9c4f7da6402f1c432e07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 16:01:31 +0200
Subject: [PATCH 07/22] Wip

---
 datafusion/sqllogictest/test_files/joins.slt  |  4 +-
 .../sqllogictest/test_files/predicates.slt    | 12 ++--
 .../sqllogictest/test_files/subquery.slt      | 13 +++--
 .../sqllogictest/test_files/tpch/q10.slt.part | 56 ++++++++++---------
 4 files changed, 46 insertions(+), 39 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 7d0262952b31..d4dd6359f259 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -1052,9 +1052,9 @@ WHERE join_t2.t2_id < 100
 ----
 logical_plan
 01)Inner Join: join_t1.t1_id = join_t2.t2_id
-02)--Filter: join_t1.t1_id < UInt32(100)
+02)--Filter: join_t1.t1_id IS NOT NULL AND join_t1.t1_id < UInt32(100)
 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-04)--Filter: join_t2.t2_id < UInt32(100)
+04)--Filter: join_t2.t2_id IS NOT NULL AND join_t2.t2_id < UInt32(100)
 05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 # Reduce left join 2 (to inner join)
diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt
index 878d7c8a4dfb..646cd7e00f21 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -663,23 +663,23 @@ OR
 logical_plan
 01)Projection: lineitem.l_partkey
 02)--Inner Join: lineitem.l_partkey = part.p_partkey Filter: part.p_brand = Utf8("Brand#12") AND lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2) AND part.p_size <= Int32(15)
-03)----Filter: lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) OR lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2)
-04)------TableScan: lineitem projection=[l_partkey, l_quantity], partial_filters=[lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) OR lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2)]
-05)----Filter: (part.p_brand = Utf8("Brand#12") AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_size <= Int32(15)) AND part.p_size >= Int32(1)
-06)------TableScan: part projection=[p_partkey, p_brand, p_size], partial_filters=[part.p_size >= Int32(1), part.p_brand = Utf8("Brand#12") AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_size <= Int32(15)]
+03)----Filter: (lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) OR lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2)) AND lineitem.l_partkey IS NOT NULL
+04)------TableScan: lineitem projection=[l_partkey, l_quantity], partial_filters=[lineitem.l_quantity >= Decimal128(Some(100),15,2) AND lineitem.l_quantity <= Decimal128(Some(1100),15,2) OR lineitem.l_quantity >= Decimal128(Some(1000),15,2) AND lineitem.l_quantity <= Decimal128(Some(2000),15,2) OR lineitem.l_quantity >= Decimal128(Some(2000),15,2) AND lineitem.l_quantity <= Decimal128(Some(3000),15,2), lineitem.l_partkey IS NOT NULL]
+05)----Filter: (part.p_brand = Utf8("Brand#12") AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_size <= Int32(15)) AND part.p_partkey IS NOT NULL AND part.p_size >= Int32(1)
+06)------TableScan: part projection=[p_partkey, p_brand, p_size], partial_filters=[part.p_size >= Int32(1), part.p_brand = Utf8("Brand#12") AND part.p_size <= Int32(5) OR part.p_brand = Utf8("Brand#23") AND part.p_size <= Int32(10) OR part.p_brand = Utf8("Brand#34") AND part.p_size <= Int32(15), part.p_partkey IS NOT NULL]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
 02)--HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], filter=p_brand@1 = Brand#12 AND l_quantity@0 >= Some(100),15,2 AND l_quantity@0 <= Some(1100),15,2 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND l_quantity@0 >= Some(1000),15,2 AND l_quantity@0 <= Some(2000),15,2 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND l_quantity@0 >= Some(2000),15,2 AND l_quantity@0 <= Some(3000),15,2 AND p_size@2 <= 15, projection=[l_partkey@0]
 03)----CoalesceBatchesExec: target_batch_size=8192
 04)------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
 05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------FilterExec: l_quantity@1 >= Some(100),15,2 AND l_quantity@1 <= Some(1100),15,2 OR l_quantity@1 >= Some(1000),15,2 AND l_quantity@1 <= Some(2000),15,2 OR l_quantity@1 >= Some(2000),15,2 AND l_quantity@1 <= Some(3000),15,2
+06)----------FilterExec: (l_quantity@1 >= Some(100),15,2 AND l_quantity@1 <= Some(1100),15,2 OR l_quantity@1 >= Some(1000),15,2 AND l_quantity@1 <= Some(2000),15,2 OR l_quantity@1 >= Some(2000),15,2 AND l_quantity@1 <= Some(3000),15,2) AND l_partkey@0 IS NOT NULL
 07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_quantity], has_header=true
 09)----CoalesceBatchesExec: target_batch_size=8192
 10)------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
 11)--------CoalesceBatchesExec: target_batch_size=8192
-12)----------FilterExec: (p_brand@1 = Brand#12 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_size@2 <= 15) AND p_size@2 >= 1
+12)----------FilterExec: (p_brand@1 = Brand#12 AND p_size@2 <= 5 OR p_brand@1 = Brand#23 AND p_size@2 <= 10 OR p_brand@1 = Brand#34 AND p_size@2 <= 15) AND p_partkey@0 IS NOT NULL AND p_size@2 >= 1
 13)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 14)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand, p_size], has_header=true
 
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index 30b3631681e7..4469bebeeb84 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -198,7 +198,8 @@ logical_plan
 04)----SubqueryAlias: __scalar_sq_1
 05)------Projection: sum(t2.t2_int), t2.t2_id
 06)--------Aggregate: groupBy=[[t2.t2_id]], aggr=[[sum(CAST(t2.t2_int AS Int64))]]
-07)----------TableScan: t2 projection=[t2_id, t2_int]
+07)----------Filter: t2.t2_id IS NOT NULL
+08)------------TableScan: t2 projection=[t2_id, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, sum(t2.t2_int)@0 as t2_sum]
 02)--CoalesceBatchesExec: target_batch_size=2
@@ -208,10 +209,12 @@ physical_plan
 06)----------CoalesceBatchesExec: target_batch_size=2
 07)------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
 08)--------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
-09)----------------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
-10)------CoalesceBatchesExec: target_batch_size=2
-11)--------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
-12)----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+09)----------------CoalesceBatchesExec: target_batch_size=2
+10)------------------FilterExec: t2_id@0 IS NOT NULL
+11)--------------------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+12)------CoalesceBatchesExec: target_batch_size=2
+13)--------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
+14)----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
 
 query II rowsort
 SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id) as t2_sum from t1
diff --git a/datafusion/sqllogictest/test_files/tpch/q10.slt.part b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
index 73593a470c9a..d9779da8e629 100644
--- a/datafusion/sqllogictest/test_files/tpch/q10.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
@@ -60,14 +60,16 @@ logical_plan
 07)------------Inner Join: orders.o_orderkey = lineitem.l_orderkey
 08)--------------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey
 09)----------------Inner Join: customer.c_custkey = orders.o_custkey
-10)------------------TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment]
-11)------------------Projection: orders.o_orderkey, orders.o_custkey
-12)--------------------Filter: orders.o_orderdate >= Date32("1993-10-01") AND orders.o_orderdate < Date32("1994-01-01")
-13)----------------------TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1993-10-01"), orders.o_orderdate < Date32("1994-01-01")]
-14)--------------Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
-15)----------------Filter: lineitem.l_returnflag = Utf8("R")
-16)------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R")]
-17)----------TableScan: nation projection=[n_nationkey, n_name]
+10)------------------Filter: customer.c_nationkey IS NOT NULL AND customer.c_custkey IS NOT NULL
+11)--------------------TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], partial_filters=[customer.c_nationkey IS NOT NULL, customer.c_custkey IS NOT NULL]
+12)------------------Projection: orders.o_orderkey, orders.o_custkey
+13)--------------------Filter: orders.o_orderkey IS NOT NULL AND orders.o_custkey IS NOT NULL AND orders.o_orderdate >= Date32("1993-10-01") AND orders.o_orderdate < Date32("1994-01-01")
+14)----------------------TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1993-10-01"), orders.o_orderdate < Date32("1994-01-01"), orders.o_orderkey IS NOT NULL, orders.o_custkey IS NOT NULL]
+15)--------------Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
+16)----------------Filter: lineitem.l_orderkey IS NOT NULL AND lineitem.l_returnflag = Utf8("R")
+17)------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R"), lineitem.l_orderkey IS NOT NULL]
+18)----------Filter: nation.n_nationkey IS NOT NULL
+19)------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_nationkey IS NOT NULL]
 physical_plan
 01)SortPreservingMergeExec: [revenue@2 DESC], fetch=10
 02)--SortExec: TopK(fetch=10), expr=[revenue@2 DESC], preserve_partitioning=[true]
@@ -88,24 +90,26 @@ physical_plan
 17)--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7]
 18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
 19)------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-20)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-21)----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], has_header=false
-22)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-23)------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-24)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)----------------------------------------FilterExec: o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01, projection=[o_orderkey@0, o_custkey@1]
-26)------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], has_header=false
-27)--------------------------CoalesceBatchesExec: target_batch_size=8192
-28)----------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-29)------------------------------CoalesceBatchesExec: target_batch_size=8192
-30)--------------------------------FilterExec: l_returnflag@3 = R, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
-31)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], has_header=false
-32)------------------CoalesceBatchesExec: target_batch_size=8192
-33)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-34)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-35)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
-
-
+20)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
+21)----------------------------------------FilterExec: c_nationkey@3 IS NOT NULL AND c_custkey@0 IS NOT NULL
+22)------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+23)--------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], has_header=false
+24)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+25)------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+26)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
+27)----------------------------------------FilterExec: o_orderkey@0 IS NOT NULL AND o_custkey@1 IS NOT NULL AND o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01, projection=[o_orderkey@0, o_custkey@1]
+28)------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], has_header=false
+29)--------------------------CoalesceBatchesExec: target_batch_size=8192
+30)----------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+31)------------------------------CoalesceBatchesExec: target_batch_size=8192
+32)--------------------------------FilterExec: l_orderkey@0 IS NOT NULL AND l_returnflag@3 = R, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
+33)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], has_header=false
+34)------------------CoalesceBatchesExec: target_batch_size=8192
+35)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+36)----------------------CoalesceBatchesExec: target_batch_size=8192
+37)------------------------FilterExec: n_nationkey@0 IS NOT NULL
+38)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+39)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
 
 query ITRRTTTT
 select

From 5132ff84915e539edd3fea388cd1cdbec41d43dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 16:15:05 +0200
Subject: [PATCH 08/22] WIP

---
 .../optimizer/tests/optimizer_integration.rs  | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/datafusion/optimizer/tests/optimizer_integration.rs b/datafusion/optimizer/tests/optimizer_integration.rs
index 5292b66197f6..da5e92eafd11 100644
--- a/datafusion/optimizer/tests/optimizer_integration.rs
+++ b/datafusion/optimizer/tests/optimizer_integration.rs
@@ -177,15 +177,12 @@ fn intersect() -> Result<()> {
     let plan = test_sql(sql)?;
     let expected =
         "LeftSemi Join: test.col_int32 = test.col_int32, test.col_utf8 = test.col_utf8\
-    \n  Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]\
-    \n    LeftSemi Join: test.col_int32 = test.col_int32, test.col_utf8 = test.col_utf8\
-    \n      Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]\
-    \n        Filter: test.col_int32 IS NOT NULL AND test.col_utf8 IS NOT NULL\
-    \n          TableScan: test projection=[col_int32, col_utf8]\
-    \n      Filter: test.col_int32 IS NOT NULL AND test.col_utf8 IS NOT NULL\
-    \n        TableScan: test projection=[col_int32, col_utf8]\
-    \n  Filter: test.col_int32 IS NOT NULL AND test.col_utf8 IS NOT NULL\
-    \n    TableScan: test projection=[col_int32, col_utf8]";
+        \n  Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]\
+        \n    LeftSemi Join: test.col_int32 = test.col_int32, test.col_utf8 = test.col_utf8\
+        \n      Aggregate: groupBy=[[test.col_int32, test.col_utf8]], aggr=[[]]\
+        \n        TableScan: test projection=[col_int32, col_utf8]\
+        \n      TableScan: test projection=[col_int32, col_utf8]\
+        \n  TableScan: test projection=[col_int32, col_utf8]";
     assert_eq!(expected, format!("{plan}"));
     Ok(())
 }
@@ -281,11 +278,9 @@ fn test_same_name_but_not_ambiguous() {
     let expected = "LeftSemi Join: t1.col_int32 = t2.col_int32\
     \n  Aggregate: groupBy=[[t1.col_int32]], aggr=[[]]\
     \n    SubqueryAlias: t1\
-    \n      Filter: test.col_int32 IS NOT NULL\
-    \n        TableScan: test projection=[col_int32]\
+    \n      TableScan: test projection=[col_int32]\
     \n  SubqueryAlias: t2\
-    \n    Filter: test.col_int32 IS NOT NULL\
-    \n      TableScan: test projection=[col_int32]";
+    \n    TableScan: test projection=[col_int32]";
     assert_eq!(expected, format!("{plan}"));
 }
 

From 245fc11a3a3d57e0cd6a2cf2e3d16a631b0fe216 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 16:26:29 +0200
Subject: [PATCH 09/22] Add constraints

---
 .../test_files/tpch/create_tables.slt.part    | 138 +++++++++---------
 1 file changed, 69 insertions(+), 69 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
index d6249cb57990..92507aaf947f 100644
--- a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
@@ -23,102 +23,102 @@
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS supplier (
-        s_suppkey  BIGINT,
-        s_name VARCHAR,
-        s_address VARCHAR,
-        s_nationkey BIGINT,
-        s_phone VARCHAR,
-        s_acctbal DECIMAL(15, 2),
-        s_comment VARCHAR,
-        s_rev VARCHAR,
+        s_suppkey  BIGINT NOT NULL,
+        s_name VARCHAR NOT NULL,
+        s_address VARCHAR NOT NULL,
+        s_nationkey BIGINT NOT NULL,
+        s_phone VARCHAR NOT NULL,
+        s_acctbal DECIMAL(15, 2) NOT NULL,
+        s_comment VARCHAR NOT NULL,
+        NOT NULL VARCHAR NOT NULL,
 ) STORED AS CSV LOCATION 'test_files/tpch/data/supplier.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS part (
-        p_partkey BIGINT,
-        p_name VARCHAR,
-        p_mfgr VARCHAR,
-        p_brand VARCHAR,
-        p_type VARCHAR,
-        p_size INTEGER,
-        p_container VARCHAR,
-        p_retailprice DECIMAL(15, 2),
-        p_comment VARCHAR,
-        p_rev VARCHAR,
+        p_partkey BIGINT NOT NULL,
+        p_name VARCHAR NOT NULL,
+        p_mfgr VARCHAR NOT NULL,
+        p_brand VARCHAR NOT NULL,
+        p_type VARCHAR NOT NULL,
+        p_size INTEGER NOT NULL,
+        p_container VARCHAR NOT NULL,
+        p_retailprice DECIMAL(15, 2) NOT NULL,
+        p_comment VARCHAR NOT NULL,
+        p_rev VARCHAR NOT NULL,
 ) STORED AS CSV LOCATION 'test_files/tpch/data/part.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS partsupp (
-        ps_partkey BIGINT,
-        ps_suppkey BIGINT,
-        ps_availqty INTEGER,
-        ps_supplycost DECIMAL(15, 2),
-        ps_comment VARCHAR,
-        ps_rev VARCHAR,
+        ps_partkey BIGINT NOT NULL,
+        ps_suppkey BIGINT NOT NULL,
+        ps_availqty INTEGER NOT NULL,
+        ps_supplycost DECIMAL(15, 2) NOT NULL,
+        ps_comment VARCHAR NOT NULL,
+        ps_rev VARCHAR NOT NULL,
 ) STORED AS CSV LOCATION 'test_files/tpch/data/partsupp.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS customer (
-        c_custkey BIGINT,
-        c_name VARCHAR,
-        c_address VARCHAR,
-        c_nationkey BIGINT,
-        c_phone VARCHAR,
-        c_acctbal DECIMAL(15, 2),
-        c_mktsegment VARCHAR,
-        c_comment VARCHAR,
-        c_rev VARCHAR,
+        c_custkey BIGINT  NOT NULL,
+        c_name VARCHAR NOT NULL,
+        c_address VARCHAR NOT NULL,
+        c_nationkey BIGINT NOT NULL,
+        c_phone VARCHAR NOT NULL,
+        c_acctbal DECIMAL(15, 2) NOT NULL,
+        c_mktsegment VARCHAR NOT NULL,
+        c_comment VARCHAR NOT NULL,
+        c_rev VARCHAR NOT NULL,
 ) STORED AS CSV LOCATION 'test_files/tpch/data/customer.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS orders (
-        o_orderkey BIGINT,
-        o_custkey BIGINT,
-        o_orderstatus VARCHAR,
-        o_totalprice DECIMAL(15, 2),
-        o_orderdate DATE,
-        o_orderpriority VARCHAR,
-        o_clerk VARCHAR,
-        o_shippriority INTEGER,
-        o_comment VARCHAR,
-        o_rev VARCHAR,
+        o_orderkey BIGINT NOT NULL,
+        o_custkey BIGINT NOT NULL,
+        o_orderstatus VARCHAR NOT NULL,
+        o_totalprice DECIMAL(15, 2) NOT NULL,
+        o_orderdate DATE NOT NULL,
+        o_orderpriority VARCHAR NOT NULL,
+        o_clerk VARCHAR NOT NULL,
+        o_shippriority INTEGER NOT NULL,
+        o_comment VARCHAR NOT NULL,
+        o_rev VARCHAR NOT NULL,
 )  STORED AS CSV LOCATION 'test_files/tpch/data/orders.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS lineitem (
-        l_orderkey BIGINT,
-        l_partkey BIGINT,
-        l_suppkey BIGINT,
-        l_linenumber INTEGER,
-        l_quantity DECIMAL(15, 2),
-        l_extendedprice DECIMAL(15, 2),
-        l_discount DECIMAL(15, 2),
-        l_tax DECIMAL(15, 2),
-        l_returnflag VARCHAR,
-        l_linestatus VARCHAR,
-        l_shipdate DATE,
-        l_commitdate DATE,
-        l_receiptdate DATE,
-        l_shipinstruct VARCHAR,
-        l_shipmode VARCHAR,
-        l_comment VARCHAR,
-        l_rev VARCHAR,
+        l_orderkey BIGINT NOT NULL,
+        l_partkey BIGINT NOT NULL,
+        l_suppkey BIGINT NOT NULL,
+        l_linenumber INTEGER NOT NULL,
+        l_quantity DECIMAL(15, 2) NOT NULL,
+        l_extendedprice DECIMAL(15, 2) NOT NULL,
+        l_discount DECIMAL(15, 2) NOT NULL,
+        l_tax DECIMAL(15, 2) NOT NULL,
+        l_returnflag VARCHAR NOT NULL,
+        l_linestatus VARCHAR NOT NULL,
+        l_shipdate DATE NOT NULL,
+        l_commitdate DATE NOT NULL,
+        l_receiptdate DATE NOT NULL,
+        l_shipinstruct VARCHAR NOT NULL,
+        l_shipmode VARCHAR NOT NULL,
+        l_comment VARCHAR NOT NULL,
+        l_rev VARCHAR NOT NULL,
 ) STORED AS CSV LOCATION 'test_files/tpch/data/lineitem.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS nation (
-        n_nationkey BIGINT,
-        n_name VARCHAR,
-        n_regionkey BIGINT,
-        n_comment VARCHAR,
-        n_rev VARCHAR,
+        n_nationkey BIGINT NOT NULL,
+        n_name VARCHAR NOT NULL,
+        n_regionkey BIGINT NOT NULL,
+        n_comment VARCHAR NOT NULL,
+        n_rev VARCHAR NOT NULL,
 ) STORED AS CSV LOCATION 'test_files/tpch/data/nation.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok
 CREATE EXTERNAL TABLE IF NOT EXISTS region (
-        r_regionkey BIGINT,
-        r_name VARCHAR,
-        r_comment VARCHAR,
-        r_rev VARCHAR,
+        r_regionkey BIGINT NOT NULL,
+        r_name VARCHAR NOT NULL,
+        r_comment VARCHAR NOT NULL,
+        r_rev VARCHAR NOT NULL,
 ) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');

From 6b83f1ac54ae5f9fd41b193aaeca2f15043d2fb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 16:42:41 +0200
Subject: [PATCH 10/22] test failures

---
 .../sqllogictest/test_files/group_by.slt      |  7 ++
 datafusion/sqllogictest/test_files/join.slt   |  2 +
 datafusion/sqllogictest/test_files/joins.slt  |  7 +-
 .../sqllogictest/test_files/predicates.slt    | 39 ++++++----
 .../test_files/sort_merge_join.slt            | 24 ++++--
 .../sqllogictest/test_files/subquery.slt      | 14 ++--
 .../sqllogictest/test_files/tpch/q10.slt.part | 73 ++++++-------------
 7 files changed, 86 insertions(+), 80 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 9eb7129027a7..4f04b75de1da 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2015,6 +2015,13 @@ logical_plan
 09)----------Filter: tab0.col0 IS NOT NULL
 10)------------TableScan: tab0 projection=[col0, col1]
 physical_plan
+01)SortPreservingMergeExec: [col0@0 ASC NULLS LAST]
+02)--SortExec: expr=[col0@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[col0@0 as col0, last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]@3 as last_col1]
+04)------AggregateExec: mode=FinalPartitioned, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([col0@0, col1@1, col2@2], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[col0@0 as col0, col1@1 as col1, col2@2 as col2], aggr=[last_value(r.col1) ORDER BY [r.col0 ASC NULLS LAST]]
 08)--------------ProjectionExec: expr=[col0@2 as col0, col1@3 as col1, col2@4 as col2, col0@0 as col0, col1@1 as col1]
 09)----------------CoalesceBatchesExec: target_batch_size=8192
 10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(col0@0, col0@0)]
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index f5865f99acfe..1c43c63ddbdf 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -756,6 +756,8 @@ logical_plan
 05)----Filter: t1.a IS NOT NULL
 06)------TableScan: t1 projection=[a, b]
 physical_plan
+01)CoalesceBatchesExec: target_batch_size=8192
+02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
 03)----CoalesceBatchesExec: target_batch_size=8192
 04)------FilterExec: a@0 IS NOT NULL
 05)--------MemoryExec: partitions=1, partition_sizes=[1]
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index d4dd6359f259..2b6444da3663 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -1068,9 +1068,10 @@ WHERE join_t2.t2_int < 10 or (join_t1.t1_int > 2 and join_t2.t2_name != 'w')
 ----
 logical_plan
 01)Inner Join: join_t1.t1_id = join_t2.t2_id Filter: join_t2.t2_int < UInt32(10) OR join_t1.t1_int > UInt32(2) AND join_t2.t2_name != Utf8("w")
-02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-03)--Filter: join_t2.t2_int < UInt32(10) OR join_t2.t2_name != Utf8("w")
-04)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+02)--Filter: join_t1.t1_id IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+04)--Filter: (join_t2.t2_int < UInt32(10) OR join_t2.t2_name != Utf8("w")) AND join_t2.t2_id IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 # Reduce left join 3 (to inner join)
 
diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt
index 646cd7e00f21..2edd71d88ff1 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -753,11 +753,13 @@ logical_plan
 03)----Inner Join: part.p_partkey = partsupp.ps_partkey
 04)------Projection: lineitem.l_extendedprice, lineitem.l_discount, part.p_partkey
 05)--------Inner Join: lineitem.l_partkey = part.p_partkey
-06)----------TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount]
-07)----------Projection: part.p_partkey
-08)------------Filter: part.p_brand = Utf8("Brand#12") OR part.p_brand = Utf8("Brand#23")
-09)--------------TableScan: part projection=[p_partkey, p_brand], partial_filters=[part.p_brand = Utf8("Brand#12") OR part.p_brand = Utf8("Brand#23")]
-10)------TableScan: partsupp projection=[ps_partkey, ps_suppkey]
+06)----------Filter: lineitem.l_partkey IS NOT NULL
+07)------------TableScan: lineitem projection=[l_partkey, l_extendedprice, l_discount], partial_filters=[lineitem.l_partkey IS NOT NULL]
+08)----------Projection: part.p_partkey
+09)------------Filter: part.p_partkey IS NOT NULL AND (part.p_brand = Utf8("Brand#12") OR part.p_brand = Utf8("Brand#23"))
+10)--------------TableScan: part projection=[p_partkey, p_brand], partial_filters=[part.p_brand = Utf8("Brand#12") OR part.p_brand = Utf8("Brand#23"), part.p_partkey IS NOT NULL]
+11)------Filter: partsupp.ps_partkey IS NOT NULL
+12)--------TableScan: partsupp projection=[ps_partkey, ps_suppkey]
 physical_plan
 01)AggregateExec: mode=SinglePartitioned, gby=[p_partkey@2 as p_partkey], aggr=[sum(lineitem.l_extendedprice), avg(lineitem.l_discount), count(DISTINCT partsupp.ps_suppkey)]
 02)--CoalesceBatchesExec: target_batch_size=8192
@@ -766,17 +768,22 @@ physical_plan
 05)--------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_partkey@3]
 06)----------CoalesceBatchesExec: target_batch_size=8192
 07)------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_extendedprice, l_discount], has_header=true
-10)----------CoalesceBatchesExec: target_batch_size=8192
-11)------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-12)--------------CoalesceBatchesExec: target_batch_size=8192
-13)----------------FilterExec: p_brand@1 = Brand#12 OR p_brand@1 = Brand#23, projection=[p_partkey@0]
-14)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-15)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand], has_header=true
-16)------CoalesceBatchesExec: target_batch_size=8192
-17)--------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=1
-18)----------MemoryExec: partitions=1, partition_sizes=[1]
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------FilterExec: l_partkey@0 IS NOT NULL
+10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_extendedprice, l_discount], has_header=true
+12)----------CoalesceBatchesExec: target_batch_size=8192
+13)------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+14)--------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------FilterExec: p_partkey@0 IS NOT NULL AND (p_brand@1 = Brand#12 OR p_brand@1 = Brand#23), projection=[p_partkey@0]
+16)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+17)--------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand], has_header=true
+18)------CoalesceBatchesExec: target_batch_size=8192
+19)--------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+20)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+21)------------CoalesceBatchesExec: target_batch_size=8192
+22)--------------FilterExec: ps_partkey@0 IS NOT NULL
+23)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # Inlist simplification
 
diff --git a/datafusion/sqllogictest/test_files/sort_merge_join.slt b/datafusion/sqllogictest/test_files/sort_merge_join.slt
index ebd53e9690fc..e9b4c73e492b 100644
--- a/datafusion/sqllogictest/test_files/sort_merge_join.slt
+++ b/datafusion/sqllogictest/test_files/sort_merge_join.slt
@@ -34,14 +34,26 @@ EXPLAIN SELECT t1.a, t1.b, t2.a, t2.b FROM t1 JOIN t2 ON t1.a = t2.a AND t2.b *
 ----
 logical_plan
 01)Inner Join: t1.a = t2.a Filter: CAST(t2.b AS Int64) * Int64(50) <= CAST(t1.b AS Int64)
-02)--TableScan: t1 projection=[a, b]
-03)--TableScan: t2 projection=[a, b]
+02)--Filter: t1.a IS NOT NULL
+03)----TableScan: t1 projection=[a, b]
+04)--Filter: t2.a IS NOT NULL
+05)----TableScan: t2 projection=[a, b]
 physical_plan
 01)SortMergeJoin: join_type=Inner, on=[(a@0, a@0)], filter=CAST(b@1 AS Int64) * 50 <= CAST(b@0 AS Int64)
-02)--SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-03)----MemoryExec: partitions=1, partition_sizes=[1]
-04)--SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
-05)----MemoryExec: partitions=1, partition_sizes=[1]
+02)--SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+03)----CoalesceBatchesExec: target_batch_size=8192
+04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------FilterExec: a@0 IS NOT NULL
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
+09)--SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+10)----CoalesceBatchesExec: target_batch_size=8192
+11)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+12)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+13)----------CoalesceBatchesExec: target_batch_size=8192
+14)------------FilterExec: a@0 IS NOT NULL
+15)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # inner join with join filter
 query TITI rowsort
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index 4469bebeeb84..3e907ad88251 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -235,7 +235,8 @@ logical_plan
 04)----SubqueryAlias: __scalar_sq_1
 05)------Projection: sum(t2.t2_int * Float64(1)) + Float64(1) AS sum(t2.t2_int * Float64(1)) + Int64(1), t2.t2_id
 06)--------Aggregate: groupBy=[[t2.t2_id]], aggr=[[sum(CAST(t2.t2_int AS Float64)) AS sum(t2.t2_int * Float64(1))]]
-07)----------TableScan: t2 projection=[t2_id, t2_int]
+07)----------Filter: t2.t2_id IS NOT NULL
+08)------------TableScan: t2 projection=[t2_id, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, sum(t2.t2_int * Float64(1)) + Int64(1)@0 as t2_sum]
 02)--CoalesceBatchesExec: target_batch_size=2
@@ -245,11 +246,12 @@ physical_plan
 06)----------CoalesceBatchesExec: target_batch_size=2
 07)------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
 08)--------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int * Float64(1))]
-09)----------------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
-10)------CoalesceBatchesExec: target_batch_size=2
-11)--------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
-12)----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
-
+09)----------------CoalesceBatchesExec: target_batch_size=2
+10)------------------FilterExec: t2_id@0 IS NOT NULL
+11)--------------------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+12)------CoalesceBatchesExec: target_batch_size=2
+13)--------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
+14)----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
 query IR rowsort
 SELECT t1_id, (SELECT sum(t2_int * 1.0) + 1 FROM t2 WHERE t2.t2_id = t1.t1_id) as t2_sum from t1
 ----
diff --git a/datafusion/sqllogictest/test_files/tpch/q10.slt.part b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
index d9779da8e629..aed5e055e4db 100644
--- a/datafusion/sqllogictest/test_files/tpch/q10.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
@@ -60,56 +60,31 @@ logical_plan
 07)------------Inner Join: orders.o_orderkey = lineitem.l_orderkey
 08)--------------Projection: customer.c_custkey, customer.c_name, customer.c_address, customer.c_nationkey, customer.c_phone, customer.c_acctbal, customer.c_comment, orders.o_orderkey
 09)----------------Inner Join: customer.c_custkey = orders.o_custkey
-10)------------------Filter: customer.c_nationkey IS NOT NULL AND customer.c_custkey IS NOT NULL
-11)--------------------TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], partial_filters=[customer.c_nationkey IS NOT NULL, customer.c_custkey IS NOT NULL]
-12)------------------Projection: orders.o_orderkey, orders.o_custkey
-13)--------------------Filter: orders.o_orderkey IS NOT NULL AND orders.o_custkey IS NOT NULL AND orders.o_orderdate >= Date32("1993-10-01") AND orders.o_orderdate < Date32("1994-01-01")
-14)----------------------TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1993-10-01"), orders.o_orderdate < Date32("1994-01-01"), orders.o_orderkey IS NOT NULL, orders.o_custkey IS NOT NULL]
-15)--------------Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
-16)----------------Filter: lineitem.l_orderkey IS NOT NULL AND lineitem.l_returnflag = Utf8("R")
-17)------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R"), lineitem.l_orderkey IS NOT NULL]
-18)----------Filter: nation.n_nationkey IS NOT NULL
-19)------------TableScan: nation projection=[n_nationkey, n_name], partial_filters=[nation.n_nationkey IS NOT NULL]
+10)------------------TableScan: customer projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment]
+11)------------------Projection: orders.o_orderkey, orders.o_custkey
+12)--------------------Filter: orders.o_orderdate >= Date32("1993-10-01") AND orders.o_orderdate < Date32("1994-01-01")
+13)----------------------TableScan: orders projection=[o_orderkey, o_custkey, o_orderdate], partial_filters=[orders.o_orderdate >= Date32("1993-10-01"), orders.o_orderdate < Date32("1994-01-01")]
+14)--------------Projection: lineitem.l_orderkey, lineitem.l_extendedprice, lineitem.l_discount
+15)----------------Filter: lineitem.l_returnflag = Utf8("R")
+16)------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R")]
+17)----------TableScan: nation projection=[n_nationkey, n_name]
 physical_plan
-01)SortPreservingMergeExec: [revenue@2 DESC], fetch=10
-02)--SortExec: TopK(fetch=10), expr=[revenue@2 DESC], preserve_partitioning=[true]
-03)----ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment]
-04)------AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4
-07)------------AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-08)--------------CoalesceBatchesExec: target_batch_size=8192
-09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@3, n_nationkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@7, l_discount@8, n_name@10]
-10)------------------CoalesceBatchesExec: target_batch_size=8192
-11)--------------------RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4
-12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10]
-14)--------------------------CoalesceBatchesExec: target_batch_size=8192
-15)----------------------------RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4
-16)------------------------------CoalesceBatchesExec: target_batch_size=8192
-17)--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7]
-18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-20)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-21)----------------------------------------FilterExec: c_nationkey@3 IS NOT NULL AND c_custkey@0 IS NOT NULL
-22)------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-23)--------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], has_header=false
-24)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-26)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-27)----------------------------------------FilterExec: o_orderkey@0 IS NOT NULL AND o_custkey@1 IS NOT NULL AND o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01, projection=[o_orderkey@0, o_custkey@1]
-28)------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], has_header=false
-29)--------------------------CoalesceBatchesExec: target_batch_size=8192
-30)----------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-31)------------------------------CoalesceBatchesExec: target_batch_size=8192
-32)--------------------------------FilterExec: l_orderkey@0 IS NOT NULL AND l_returnflag@3 = R, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
-33)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], has_header=false
-34)------------------CoalesceBatchesExec: target_batch_size=8192
-35)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-36)----------------------CoalesceBatchesExec: target_batch_size=8192
-37)------------------------FilterExec: n_nationkey@0 IS NOT NULL
-38)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-39)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
+20)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+21)----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], has_header=false
+22)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+23)------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+24)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
+25)----------------------------------------FilterExec: o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01, projection=[o_orderkey@0, o_custkey@1]
+26)------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], has_header=false
+27)--------------------------CoalesceBatchesExec: target_batch_size=8192
+28)----------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+29)------------------------------CoalesceBatchesExec: target_batch_size=8192
+30)--------------------------------FilterExec: l_returnflag@3 = R, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
+31)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], has_header=false
+32)------------------CoalesceBatchesExec: target_batch_size=8192
+33)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+34)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+35)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
 
 query ITRRTTTT
 select

From 8fa7295bcf650aa52afd8e928b24a25ea2769659 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 17:16:36 +0200
Subject: [PATCH 11/22] Wip

---
 .../sqllogictest/test_files/group_by.slt      | 16 +++++++++++-----
 datafusion/sqllogictest/test_files/join.slt   | 19 +++++++++++--------
 datafusion/sqllogictest/test_files/joins.slt  |  7 ++++---
 .../sqllogictest/test_files/subquery.slt      |  1 +
 4 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 4f04b75de1da..1df9736df6cd 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2879,9 +2879,11 @@ logical_plan
 04)------Projection: s.zip_code, s.country, s.sn, s.ts, s.currency, e.sn, e.amount
 05)--------Inner Join: s.currency = e.currency Filter: s.ts >= e.ts
 06)----------SubqueryAlias: s
-07)------------TableScan: sales_global projection=[zip_code, country, sn, ts, currency]
-08)----------SubqueryAlias: e
-09)------------TableScan: sales_global projection=[sn, ts, currency, amount]
+07)------------Filter: sales_global.currency IS NOT NULL
+08)--------------TableScan: sales_global projection=[zip_code, country, sn, ts, currency]
+09)----------SubqueryAlias: e
+10)------------Filter: sales_global.currency IS NOT NULL
+11)--------------TableScan: sales_global projection=[sn, ts, currency, amount]
 physical_plan
 01)SortExec: expr=[sn@2 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[zip_code@1 as zip_code, country@2 as country, sn@0 as sn, ts@3 as ts, currency@4 as currency, last_value(e.amount) ORDER BY [e.sn ASC NULLS LAST]@5 as last_rate]
@@ -2889,8 +2891,12 @@ physical_plan
 04)------ProjectionExec: expr=[zip_code@2 as zip_code, country@3 as country, sn@4 as sn, ts@5 as ts, currency@6 as currency, sn@0 as sn, amount@1 as amount]
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(currency@2, currency@4)], filter=ts@0 >= ts@1, projection=[sn@0, amount@3, zip_code@4, country@5, sn@6, ts@7, currency@8]
-07)------------MemoryExec: partitions=1, partition_sizes=[1]
-08)------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=8192
+08)--------------FilterExec: currency@2 IS NOT NULL
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)------------CoalesceBatchesExec: target_batch_size=8192
+11)--------------FilterExec: currency@4 IS NOT NULL
+12)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query ITIPTR rowsort
 SELECT s.zip_code, s.country, s.sn, s.ts, s.currency, LAST_VALUE(e.amount ORDER BY e.sn) AS last_rate
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 1c43c63ddbdf..83a161bc3b8e 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -965,17 +965,20 @@ logical_plan
 05)--------Filter: employees.name = Utf8("Alice") OR employees.name != Utf8("Alice") AND employees.name = Utf8("Carol")
 06)----------TableScan: employees projection=[emp_id, name]
 07)------SubqueryAlias: d
-08)--------TableScan: department projection=[emp_id, dept_name]
+08)--------Filter: department.emp_id IS NOT NULL
+09)----------TableScan: department projection=[emp_id, dept_name]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
 02)--FilterExec: dept_name@2 != Engineering AND name@1 = Alice OR name@1 != Alice AND name@1 = Carol
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------HashJoinExec: mode=CollectLeft, join_type=Left, on=[(emp_id@0, emp_id@0)], projection=[emp_id@0, name@1, dept_name@3]
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: name@1 = Alice OR name@1 != Alice AND name@1 = Carol
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)----------MemoryExec: partitions=1, partition_sizes=[1]
+03)----CoalesceBatchesExec: target_batch_size=8192
+04)------HashJoinExec: mode=CollectLeft, join_type=Left, on=[(emp_id@0, emp_id@0)], projection=[emp_id@0, name@1, dept_name@3]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------FilterExec: name@1 = Alice OR name@1 != Alice AND name@1 = Carol
+07)------------MemoryExec: partitions=1, partition_sizes=[1]
+08)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------CoalesceBatchesExec: target_batch_size=8192
+10)------------FilterExec: emp_id@0 IS NOT NULL
+11)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query ITT
 SELECT e.emp_id, e.name, d.dept_name
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 2b6444da3663..d9e901b1eeca 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -1092,12 +1092,13 @@ logical_plan
 02)--SubqueryAlias: t3
 03)----Projection: join_t1.t1_id, join_t1.t1_name, join_t1.t1_int
 04)------Inner Join: join_t1.t1_id = join_t2.t2_id
-05)--------Filter: join_t1.t1_id < UInt32(100)
+05)--------Filter: join_t1.t1_id IS NOT NULL AND join_t1.t1_id < UInt32(100)
 06)----------TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 07)--------Projection: join_t2.t2_id
-08)----------Filter: join_t2.t2_int < UInt32(3) AND join_t2.t2_id < UInt32(100)
+08)----------Filter: join_t2.t2_id IS NOT NULL AND join_t2.t2_int < UInt32(3) AND join_t2.t2_id < UInt32(100)
 09)------------TableScan: join_t2 projection=[t2_id, t2_int]
-10)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+10)--Filter: join_t2.t2_int IS NOT NULL
+11)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 # Reduce right join 1 (to inner join)
 
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index 3e907ad88251..aa39e796cc15 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -252,6 +252,7 @@ physical_plan
 12)------CoalesceBatchesExec: target_batch_size=2
 13)--------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
 14)----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+
 query IR rowsort
 SELECT t1_id, (SELECT sum(t2_int * 1.0) + 1 FROM t2 WHERE t2.t2_id = t1.t1_id) as t2_sum from t1
 ----

From 0eca1290615a0acadb0d55bade1fe59e9ca3c974 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 17:29:27 +0200
Subject: [PATCH 12/22] Wip

---
 .../sqllogictest/test_files/group_by.slt      | 24 +++++++++------
 datafusion/sqllogictest/test_files/join.slt   | 30 ++++++++++++-------
 datafusion/sqllogictest/test_files/joins.slt  | 24 +++++++++------
 3 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 1df9736df6cd..9de34a763e31 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -3881,20 +3881,26 @@ logical_plan
 05)--------Projection: l.a, l.d, row_n
 06)----------Inner Join: l.d = r.d Filter: CAST(l.a AS Int64) >= CAST(r.a AS Int64) - Int64(10)
 07)------------SubqueryAlias: l
-08)--------------TableScan: multiple_ordered_table projection=[a, d]
-09)------------Projection: r.a, r.d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n
-10)--------------WindowAggr: windowExpr=[[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-11)----------------SubqueryAlias: r
-12)------------------TableScan: multiple_ordered_table projection=[a, d]
+08)--------------Filter: multiple_ordered_table.d IS NOT NULL
+09)----------------TableScan: multiple_ordered_table projection=[a, d], partial_filters=[multiple_ordered_table.d IS NOT NULL]
+10)------------Projection: r.a, r.d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n
+11)--------------Filter: r.d IS NOT NULL
+12)----------------WindowAggr: windowExpr=[[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+13)------------------SubqueryAlias: r
+14)--------------------TableScan: multiple_ordered_table projection=[a, d]
 physical_plan
 01)ProjectionExec: expr=[last_value(l.d) ORDER BY [l.a ASC NULLS LAST]@1 as amount_usd]
 02)--AggregateExec: mode=Single, gby=[row_n@2 as row_n], aggr=[last_value(l.d) ORDER BY [l.a ASC NULLS LAST]], ordering_mode=Sorted
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
-05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
-06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-08)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------FilterExec: d@1 IS NOT NULL
+07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+08)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
+09)----------CoalesceBatchesExec: target_batch_size=2
+10)------------FilterExec: d@1 IS NOT NULL
+11)--------------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+12)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 # reset partition number to 8.
 statement ok
diff --git a/datafusion/sqllogictest/test_files/join.slt b/datafusion/sqllogictest/test_files/join.slt
index 83a161bc3b8e..d15962eb6181 100644
--- a/datafusion/sqllogictest/test_files/join.slt
+++ b/datafusion/sqllogictest/test_files/join.slt
@@ -1162,22 +1162,30 @@ logical_plan
 02)--Inner Join: CAST(t1.v0 AS Float64) = t0.v1 Filter: t0.v1 + CAST(t5.v0 AS Float64) > Float64(0)
 03)----Projection: t1.v0, t1.v1, t5.v0, t5.v2, t5.v3, t5.v4
 04)------Inner Join: Using t1.v0 = t5.v0, t1.v1 = t5.v1
-05)--------TableScan: t1 projection=[v0, v1]
-06)--------TableScan: t5 projection=[v0, v1, v2, v3, v4]
-07)----TableScan: t0 projection=[v0, v1]
+05)--------Filter: CAST(t1.v0 AS Float64) IS NOT NULL AND t1.v0 IS NOT NULL AND t1.v1 IS NOT NULL
+06)----------TableScan: t1 projection=[v0, v1]
+07)--------Filter: CAST(t5.v0 AS Float64) IS NOT NULL AND t5.v0 IS NOT NULL AND t5.v1 IS NOT NULL
+08)----------TableScan: t5 projection=[v0, v1, v2, v3, v4]
+09)----Filter: t0.v1 IS NOT NULL
+10)------TableScan: t0 projection=[v0, v1]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=8192
 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(CAST(t1.v0 AS Float64)@6, v1@1)], filter=v1@1 + CAST(v0@0 AS Float64) > 0, projection=[v0@0, v1@1, v2@3, v3@4, v4@5, v0@7, v1@8]
 03)----CoalescePartitionsExec
 04)------ProjectionExec: expr=[v0@0 as v0, v1@1 as v1, v0@2 as v0, v2@3 as v2, v3@4 as v3, v4@5 as v4, CAST(v0@0 AS Float64) as CAST(t1.v0 AS Float64)]
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(v0@0, v0@0), (v1@1, v1@1)], projection=[v0@0, v1@1, v0@2, v2@4, v3@5, v4@6]
-08)--------------MemoryExec: partitions=1, partition_sizes=[0]
-09)--------------MemoryExec: partitions=1, partition_sizes=[0]
-10)----MemoryExec: partitions=1, partition_sizes=[0]
-
-
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(v0@0, v0@0), (v1@1, v1@1)], projection=[v0@0, v1@1, v0@2, v2@4, v3@5, v4@6]
+07)------------CoalesceBatchesExec: target_batch_size=8192
+08)--------------FilterExec: CAST(v0@0 AS Float64) IS NOT NULL AND v0@0 IS NOT NULL AND v1@1 IS NOT NULL
+09)----------------MemoryExec: partitions=1, partition_sizes=[0]
+10)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+11)--------------CoalesceBatchesExec: target_batch_size=8192
+12)----------------FilterExec: CAST(v0@0 AS Float64) IS NOT NULL AND v0@0 IS NOT NULL AND v1@1 IS NOT NULL
+13)------------------MemoryExec: partitions=1, partition_sizes=[0]
+14)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+15)------CoalesceBatchesExec: target_batch_size=8192
+16)--------FilterExec: v1@1 IS NOT NULL
+17)----------MemoryExec: partitions=1, partition_sizes=[0]
 
 statement ok
 drop table t5;
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index d9e901b1eeca..5ce67278d2ba 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -3457,20 +3457,26 @@ logical_plan
 05)--------Projection: l.a, l.d, row_n
 06)----------Inner Join: l.d = r.d Filter: CAST(l.a AS Int64) >= CAST(r.a AS Int64) - Int64(10)
 07)------------SubqueryAlias: l
-08)--------------TableScan: multiple_ordered_table projection=[a, d]
-09)------------Projection: r.a, r.d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n
-10)--------------WindowAggr: windowExpr=[[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
-11)----------------SubqueryAlias: r
-12)------------------TableScan: multiple_ordered_table projection=[a, d]
+08)--------------Filter: multiple_ordered_table.d IS NOT NULL
+09)----------------TableScan: multiple_ordered_table projection=[a, d], partial_filters=[multiple_ordered_table.d IS NOT NULL]
+10)------------Projection: r.a, r.d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW AS row_n
+11)--------------Filter: r.d IS NOT NULL
+12)----------------WindowAggr: windowExpr=[[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]
+13)------------------SubqueryAlias: r
+14)--------------------TableScan: multiple_ordered_table projection=[a, d]
 physical_plan
 01)ProjectionExec: expr=[last_value(l.d) ORDER BY [l.a ASC NULLS LAST]@1 as amount_usd]
 02)--AggregateExec: mode=Single, gby=[row_n@2 as row_n], aggr=[last_value(l.d) ORDER BY [l.a ASC NULLS LAST]], ordering_mode=Sorted
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
-05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
-06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
-08)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------FilterExec: d@1 IS NOT NULL
+07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+08)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
+09)----------CoalesceBatchesExec: target_batch_size=2
+10)------------FilterExec: d@1 IS NOT NULL
+11)--------------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int32(NULL)), end_bound: CurrentRow, is_causal: false }], mode=[Sorted]
+12)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 # run query above in multiple partitions
 statement ok

From 419165ecef5393e0c8465fdca3911e20cb9bb278 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 17:45:15 +0200
Subject: [PATCH 13/22] Wip

---
 .../sqllogictest/test_files/group_by.slt      | 24 ++++++++++++-------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index 9de34a763e31..7d87b29cd2eb 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -4044,21 +4044,27 @@ logical_plan
 03)----SubqueryAlias: lhs
 04)------Projection: multiple_ordered_table_with_pk.c, multiple_ordered_table_with_pk.b, sum(multiple_ordered_table_with_pk.d) AS sum1
 05)--------Aggregate: groupBy=[[multiple_ordered_table_with_pk.c, multiple_ordered_table_with_pk.b]], aggr=[[sum(CAST(multiple_ordered_table_with_pk.d AS Int64))]]
-06)----------TableScan: multiple_ordered_table_with_pk projection=[b, c, d]
-07)----SubqueryAlias: rhs
-08)------Projection: multiple_ordered_table_with_pk.c, multiple_ordered_table_with_pk.b, sum(multiple_ordered_table_with_pk.d) AS sum1
-09)--------Aggregate: groupBy=[[multiple_ordered_table_with_pk.c, multiple_ordered_table_with_pk.b]], aggr=[[sum(CAST(multiple_ordered_table_with_pk.d AS Int64))]]
-10)----------TableScan: multiple_ordered_table_with_pk projection=[b, c, d]
+06)----------Filter: multiple_ordered_table_with_pk.b IS NOT NULL
+07)------------TableScan: multiple_ordered_table_with_pk projection=[b, c, d], partial_filters=[multiple_ordered_table_with_pk.b IS NOT NULL]
+08)----SubqueryAlias: rhs
+09)------Projection: multiple_ordered_table_with_pk.c, multiple_ordered_table_with_pk.b, sum(multiple_ordered_table_with_pk.d) AS sum1
+10)--------Aggregate: groupBy=[[multiple_ordered_table_with_pk.c, multiple_ordered_table_with_pk.b]], aggr=[[sum(CAST(multiple_ordered_table_with_pk.d AS Int64))]]
+11)----------Filter: multiple_ordered_table_with_pk.b IS NOT NULL
+12)------------TableScan: multiple_ordered_table_with_pk projection=[b, c, d], partial_filters=[multiple_ordered_table_with_pk.b IS NOT NULL]
 physical_plan
 01)ProjectionExec: expr=[c@0 as c, c@2 as c, sum1@1 as sum1, sum1@3 as sum1]
 02)--CoalesceBatchesExec: target_batch_size=2
 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(b@1, b@1)], projection=[c@0, sum1@2, c@3, sum1@5]
 04)------ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
 05)--------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], has_header=true
-07)------ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
-08)--------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
-09)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], has_header=true
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: b@0 IS NOT NULL
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], has_header=true
+09)------ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
+10)--------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
+11)----------CoalesceBatchesExec: target_batch_size=2
+12)------------FilterExec: b@0 IS NOT NULL
+13)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], has_header=true
 
 query TT
 EXPLAIN SELECT lhs.c, rhs.c, lhs.sum1, rhs.sum1

From 29f112a8c95e5a140e57d987eb2cd8925a17bcc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 18:06:57 +0200
Subject: [PATCH 14/22] Wip

---
 .../join_disable_repartition_joins.slt        | 20 +++++++++++++------
 datafusion/sqllogictest/test_files/joins.slt  |  5 +++--
 .../sqllogictest/test_files/subquery.slt      | 13 +++++++-----
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index c56c59b1bd78..7f3878ae6863 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -50,16 +50,24 @@ logical_plan
 02)--Projection: t2.a
 03)----Inner Join: t1.c = t2.c
 04)------SubqueryAlias: t1
-05)--------TableScan: annotated_data projection=[c]
-06)------SubqueryAlias: t2
-07)--------TableScan: annotated_data projection=[a, c]
+05)--------Filter: annotated_data.c IS NOT NULL
+06)----------TableScan: annotated_data projection=[c], partial_filters=[annotated_data.c IS NOT NULL]
+07)------SubqueryAlias: t2
+08)--------Filter: annotated_data.c IS NOT NULL
+09)----------TableScan: annotated_data projection=[a, c], partial_filters=[annotated_data.c IS NOT NULL]
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST], fetch=5
 02)--CoalesceBatchesExec: target_batch_size=8192, fetch=5
 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)], projection=[a@1]
-04)------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], has_header=true
-05)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+04)------CoalescePartitionsExec
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------FilterExec: c@0 IS NOT NULL
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], has_header=true
+09)------CoalesceBatchesExec: target_batch_size=8192
+10)--------FilterExec: c@1 IS NOT NULL
+11)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 # preserve_inner_join
 query IIII nosort
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 5ce67278d2ba..793bd5c3efed 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -1111,9 +1111,10 @@ WHERE join_t1.t1_int IS NOT NULL
 ----
 logical_plan
 01)Inner Join: join_t1.t1_id = join_t2.t2_id
-02)--Filter: join_t1.t1_int IS NOT NULL
+02)--Filter: join_t1.t1_id IS NOT NULL AND join_t1.t1_int IS NOT NULL
 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-04)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+04)--Filter: join_t2.t2_id IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 # Reduce right join 2 (to inner join)
 
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index aa39e796cc15..35458050bab2 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -272,7 +272,8 @@ logical_plan
 04)----SubqueryAlias: __scalar_sq_1
 05)------Projection: sum(t2.t2_int), t2.t2_id
 06)--------Aggregate: groupBy=[[t2.t2_id]], aggr=[[sum(CAST(t2.t2_int AS Int64))]]
-07)----------TableScan: t2 projection=[t2_id, t2_int]
+07)----------Filter: t2.t2_id IS NOT NULL
+08)------------TableScan: t2 projection=[t2_id, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, sum(t2.t2_int)@0 as t2_sum]
 02)--CoalesceBatchesExec: target_batch_size=2
@@ -282,10 +283,12 @@ physical_plan
 06)----------CoalesceBatchesExec: target_batch_size=2
 07)------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
 08)--------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
-09)----------------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
-10)------CoalesceBatchesExec: target_batch_size=2
-11)--------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
-12)----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+09)----------------CoalesceBatchesExec: target_batch_size=2
+10)------------------FilterExec: t2_id@0 IS NOT NULL
+11)--------------------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+12)------CoalesceBatchesExec: target_batch_size=2
+13)--------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
+14)----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
 
 query II rowsort
 SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id group by t2_id, 'a') as t2_sum from t1

From 30db2c9b054f414b5321e6c53c4aec0bacca1138 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 18:54:58 +0200
Subject: [PATCH 15/22] Wip

---
 .../join_disable_repartition_joins.slt        | 30 ++++++++++---------
 datafusion/sqllogictest/test_files/joins.slt  |  6 ++--
 .../sqllogictest/test_files/subquery.slt      | 13 ++++----
 3 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index 7f3878ae6863..577729c3bcc1 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -55,19 +55,20 @@ logical_plan
 07)------SubqueryAlias: t2
 08)--------Filter: annotated_data.c IS NOT NULL
 09)----------TableScan: annotated_data projection=[a, c], partial_filters=[annotated_data.c IS NOT NULL]
-physical_plan
+
 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST], fetch=5
 02)--CoalesceBatchesExec: target_batch_size=8192, fetch=5
 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)], projection=[a@1]
 04)------CoalescePartitionsExec
-05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------FilterExec: c@0 IS NOT NULL
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], has_header=true
-09)------CoalesceBatchesExec: target_batch_size=8192
-10)--------FilterExec: c@1 IS NOT NULL
-11)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-12)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_ordering=[a@0 ASC NULLS LAST], has_header=true
+05)--------CoalescePartitionsExec
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------FilterExec: d@1 IS NOT NULL AND c@0 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
+10)--------CoalesceBatchesExec: target_batch_size=8192
+11)----------FilterExec: d@3 IS NOT NULL AND c@2 IS NOT NULL AND d@3 = 3
+12)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+13)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 # preserve_inner_join
 query IIII nosort
@@ -97,11 +98,12 @@ logical_plan
 02)--Projection: t2.a AS a2, t2.b
 03)----RightSemi Join: t1.d = t2.d, t1.c = t2.c
 04)------SubqueryAlias: t1
-05)--------TableScan: annotated_data projection=[c, d]
-06)------SubqueryAlias: t2
-07)--------Filter: annotated_data.d = Int32(3)
-08)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3)]
-physical_plan
+05)--------Filter: annotated_data.d IS NOT NULL AND annotated_data.c IS NOT NULL
+06)----------TableScan: annotated_data projection=[c, d], partial_filters=[annotated_data.d IS NOT NULL, annotated_data.c IS NOT NULL]
+07)------SubqueryAlias: t2
+08)--------Filter: annotated_data.d IS NOT NULL AND annotated_data.c IS NOT NULL AND annotated_data.d = Int32(3)
+09)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3), annotated_data.d IS NOT NULL, annotated_data.c IS NOT NULL]
+
 01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10
 02)--ProjectionExec: expr=[a@0 as a2, b@1 as b]
 03)----CoalesceBatchesExec: target_batch_size=8192, fetch=10
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 793bd5c3efed..23d99b5305f6 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -1127,8 +1127,10 @@ WHERE NOT (join_t1.t1_int = join_t2.t2_int)
 ----
 logical_plan
 01)Inner Join: join_t1.t1_id = join_t2.t2_id Filter: join_t2.t2_int != join_t1.t1_int
-02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-03)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+02)--Filter: join_t1.t1_id IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+04)--Filter: join_t2.t2_id IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 # Reduce full join to right join
 
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index 35458050bab2..493db62286b6 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -310,7 +310,8 @@ logical_plan
 05)------Projection: sum(t2.t2_int), t2.t2_id
 06)--------Filter: sum(t2.t2_int) < Int64(3)
 07)----------Aggregate: groupBy=[[t2.t2_id]], aggr=[[sum(CAST(t2.t2_int AS Int64))]]
-08)------------TableScan: t2 projection=[t2_id, t2_int]
+08)------------Filter: t2.t2_id IS NOT NULL
+09)--------------TableScan: t2 projection=[t2_id, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, sum(t2.t2_int)@0 as t2_sum]
 02)--CoalesceBatchesExec: target_batch_size=2
@@ -322,10 +323,12 @@ physical_plan
 08)--------------CoalesceBatchesExec: target_batch_size=2
 09)----------------RepartitionExec: partitioning=Hash([t2_id@0], 4), input_partitions=4
 10)------------------AggregateExec: mode=Partial, gby=[t2_id@0 as t2_id], aggr=[sum(t2.t2_int)]
-11)--------------------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
-12)------CoalesceBatchesExec: target_batch_size=2
-13)--------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
-14)----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+11)--------------------CoalesceBatchesExec: target_batch_size=2
+12)----------------------FilterExec: t2_id@0 IS NOT NULL
+13)------------------------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
+14)------CoalesceBatchesExec: target_batch_size=2
+15)--------RepartitionExec: partitioning=Hash([t1_id@0], 4), input_partitions=4
+16)----------MemoryExec: partitions=4, partition_sizes=[1, 0, 0, 0]
 
 query II rowsort
 SELECT t1_id, (SELECT sum(t2_int) FROM t2 WHERE t2.t2_id = t1.t1_id having sum(t2_int) < 3) as t2_sum from t1

From b2c74124c4fc8fbcfe11a3efc0c311ef58837263 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 18:57:10 +0200
Subject: [PATCH 16/22] Wip

---
 .../sqllogictest/test_files/tpch/q10.slt.part | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/tpch/q10.slt.part b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
index aed5e055e4db..873b635975b4 100644
--- a/datafusion/sqllogictest/test_files/tpch/q10.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
@@ -69,6 +69,25 @@ logical_plan
 16)------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], partial_filters=[lineitem.l_returnflag = Utf8("R")]
 17)----------TableScan: nation projection=[n_nationkey, n_name]
 physical_plan
+01)SortPreservingMergeExec: [revenue@2 DESC], fetch=10
+02)--SortExec: TopK(fetch=10), expr=[revenue@2 DESC], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[c_custkey@0 as c_custkey, c_name@1 as c_name, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@7 as revenue, c_acctbal@2 as c_acctbal, n_name@4 as n_name, c_address@5 as c_address, c_phone@3 as c_phone, c_comment@6 as c_comment]
+04)------AggregateExec: mode=FinalPartitioned, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@2 as c_acctbal, c_phone@3 as c_phone, n_name@4 as n_name, c_address@5 as c_address, c_comment@6 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------RepartitionExec: partitioning=Hash([c_custkey@0, c_name@1, c_acctbal@2, c_phone@3, n_name@4, c_address@5, c_comment@6], 4), input_partitions=4
+07)------------AggregateExec: mode=Partial, gby=[c_custkey@0 as c_custkey, c_name@1 as c_name, c_acctbal@4 as c_acctbal, c_phone@3 as c_phone, n_name@8 as n_name, c_address@2 as c_address, c_comment@5 as c_comment], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+08)--------------CoalesceBatchesExec: target_batch_size=8192
+09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_nationkey@3, n_nationkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@7, l_discount@8, n_name@10]
+10)------------------CoalesceBatchesExec: target_batch_size=8192
+11)--------------------RepartitionExec: partitioning=Hash([c_nationkey@3], 4), input_partitions=4
+12)----------------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(o_orderkey@7, l_orderkey@0)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, l_extendedprice@9, l_discount@10]
+14)--------------------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------------------RepartitionExec: partitioning=Hash([o_orderkey@7], 4), input_partitions=4
+16)------------------------------CoalesceBatchesExec: target_batch_size=8192
+17)--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7]
+18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+19)------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
 20)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 21)----------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], has_header=false
 22)----------------------------------CoalesceBatchesExec: target_batch_size=8192

From 001ad1a51cb40e7f83aff7861fbb8c1c4322254e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 18:57:33 +0200
Subject: [PATCH 17/22] Wip

---
 datafusion/sqllogictest/test_files/tpch/q10.slt.part | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/datafusion/sqllogictest/test_files/tpch/q10.slt.part b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
index 873b635975b4..73593a470c9a 100644
--- a/datafusion/sqllogictest/test_files/tpch/q10.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q10.slt.part
@@ -105,6 +105,8 @@ physical_plan
 34)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 35)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], has_header=false
 
+
+
 query ITRRTTTT
 select
     c_custkey,

From 835f94c2aa6359820a4313a151bcf27812a9afba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 20:53:54 +0200
Subject: [PATCH 18/22] Wip

---
 .../join_disable_repartition_joins.slt        |  35 +-
 datafusion/sqllogictest/test_files/joins.slt  | 850 +++++++++++-------
 .../sqllogictest/test_files/subquery.slt      | 150 ++--
 .../test_files/tpch/create_tables.slt.part    |   2 +-
 4 files changed, 642 insertions(+), 395 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
index 577729c3bcc1..ba68faf5966e 100644
--- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
+++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt
@@ -55,20 +55,19 @@ logical_plan
 07)------SubqueryAlias: t2
 08)--------Filter: annotated_data.c IS NOT NULL
 09)----------TableScan: annotated_data projection=[a, c], partial_filters=[annotated_data.c IS NOT NULL]
-
+physical_plan
 01)SortPreservingMergeExec: [a@0 ASC NULLS LAST], fetch=5
 02)--CoalesceBatchesExec: target_batch_size=8192, fetch=5
 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)], projection=[a@1]
 04)------CoalescePartitionsExec
-05)--------CoalescePartitionsExec
-06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------FilterExec: d@1 IS NOT NULL AND c@0 IS NOT NULL
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
-10)--------CoalesceBatchesExec: target_batch_size=8192
-11)----------FilterExec: d@3 IS NOT NULL AND c@2 IS NOT NULL AND d@3 = 3
-12)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-13)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+05)--------CoalesceBatchesExec: target_batch_size=8192
+06)----------FilterExec: c@0 IS NOT NULL
+07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], has_header=true
+09)------CoalesceBatchesExec: target_batch_size=8192
+10)--------FilterExec: c@1 IS NOT NULL
+11)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+12)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c], output_ordering=[a@0 ASC NULLS LAST], has_header=true
 
 # preserve_inner_join
 query IIII nosort
@@ -103,16 +102,20 @@ logical_plan
 07)------SubqueryAlias: t2
 08)--------Filter: annotated_data.d IS NOT NULL AND annotated_data.c IS NOT NULL AND annotated_data.d = Int32(3)
 09)----------TableScan: annotated_data projection=[a, b, c, d], partial_filters=[annotated_data.d = Int32(3), annotated_data.d IS NOT NULL, annotated_data.c IS NOT NULL]
-
+physical_plan
 01)SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10
 02)--ProjectionExec: expr=[a@0 as a2, b@1 as b]
 03)----CoalesceBatchesExec: target_batch_size=8192, fetch=10
 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)], projection=[a@0, b@1]
-05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
-06)--------CoalesceBatchesExec: target_batch_size=8192
-07)----------FilterExec: d@3 = 3
-08)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+05)--------CoalescePartitionsExec
+06)----------CoalesceBatchesExec: target_batch_size=8192
+07)------------FilterExec: d@1 IS NOT NULL AND c@0 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+09)----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true
+10)--------CoalesceBatchesExec: target_batch_size=8192
+11)----------FilterExec: d@3 IS NOT NULL AND c@2 IS NOT NULL AND d@3 = 3
+12)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+13)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
 
 # preserve_right_semi_join
 query II nosort
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 23d99b5305f6..15896a3dc319 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -1143,9 +1143,10 @@ WHERE join_t2.t2_name IS NOT NULL
 ----
 logical_plan
 01)Right Join: join_t1.t1_id = join_t2.t2_id
-02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-03)--Filter: join_t2.t2_name IS NOT NULL
-04)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+02)--Filter: join_t1.t1_id IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+04)--Filter: join_t2.t2_name IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 # Reduce full join to left join
 
@@ -1160,7 +1161,8 @@ logical_plan
 01)Left Join: join_t1.t1_id = join_t2.t2_id
 02)--Filter: join_t1.t1_name != Utf8("b")
 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-04)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+04)--Filter: join_t2.t2_id IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 # Reduce full join to inner join
 
@@ -1173,9 +1175,9 @@ WHERE join_t1.t1_name != 'b' and join_t2.t2_name = 'x'
 ----
 logical_plan
 01)Inner Join: join_t1.t1_id = join_t2.t2_id
-02)--Filter: join_t1.t1_name != Utf8("b")
+02)--Filter: join_t1.t1_id IS NOT NULL AND join_t1.t1_name != Utf8("b")
 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-04)--Filter: join_t2.t2_name = Utf8("x")
+04)--Filter: join_t2.t2_id IS NOT NULL AND join_t2.t2_name = Utf8("x")
 05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 ###
@@ -1225,10 +1227,11 @@ LEFT SEMI JOIN lsaj_t2 ON (lsaj_t1.t1_id = lsaj_t2.t2_id and lsaj_t2.t2_int > 1)
 ----
 logical_plan
 01)LeftSemi Join: lsaj_t1.t1_id = lsaj_t2.t2_id
-02)--TableScan: lsaj_t1 projection=[t1_id, t1_name]
-03)--Projection: lsaj_t2.t2_id
-04)----Filter: lsaj_t2.t2_int > UInt32(1)
-05)------TableScan: lsaj_t2 projection=[t2_id, t2_int]
+02)--Filter: lsaj_t1.t1_id IS NOT NULL
+03)----TableScan: lsaj_t1 projection=[t1_id, t1_name]
+04)--Projection: lsaj_t2.t2_id
+05)----Filter: lsaj_t2.t2_id IS NOT NULL AND lsaj_t2.t2_int > UInt32(1)
+06)------TableScan: lsaj_t2 projection=[t2_id, t2_int]
 
 # Left anti join
 
@@ -1340,20 +1343,26 @@ logical_plan
 01)Aggregate: groupBy=[[join_t1.t1_id]], aggr=[[]]
 02)--Projection: join_t1.t1_id
 03)----Inner Join: join_t1.t1_id = join_t2.t2_id
-04)------TableScan: join_t1 projection=[t1_id]
-05)------TableScan: join_t2 projection=[t2_id]
+04)------Filter: join_t1.t1_id IS NOT NULL
+05)--------TableScan: join_t1 projection=[t1_id]
+06)------Filter: join_t2.t2_id IS NOT NULL
+07)--------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)AggregateExec: mode=SinglePartitioned, gby=[t1_id@0 as t1_id], aggr=[]
 02)--CoalesceBatchesExec: target_batch_size=2
 03)----HashJoinExec: mode=Partitioned, join_type=Inner, on=[(t1_id@0, t2_id@0)], projection=[t1_id@0]
 04)------CoalesceBatchesExec: target_batch_size=2
 05)--------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------MemoryExec: partitions=1, partition_sizes=[1]
-08)------CoalesceBatchesExec: target_batch_size=2
-09)--------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
-10)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-11)------------MemoryExec: partitions=1, partition_sizes=[1]
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: t1_id@0 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)------CoalesceBatchesExec: target_batch_size=2
+11)--------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
+12)----------CoalesceBatchesExec: target_batch_size=2
+13)------------FilterExec: t2_id@0 IS NOT NULL
+14)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+15)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # Join on struct
 query TT
@@ -1363,19 +1372,25 @@ inner join join_t4 on join_t3.s3 = join_t4.s4
 ----
 logical_plan
 01)Inner Join: join_t3.s3 = join_t4.s4
-02)--TableScan: join_t3 projection=[s3]
-03)--TableScan: join_t4 projection=[s4]
+02)--Filter: join_t3.s3 IS NOT NULL
+03)----TableScan: join_t3 projection=[s3]
+04)--Filter: join_t4.s4 IS NOT NULL
+05)----TableScan: join_t4 projection=[s4]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=2
 02)--HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s3@0, s4@0)]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------RepartitionExec: partitioning=Hash([s3@0], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------MemoryExec: partitions=1, partition_sizes=[1]
-07)----CoalesceBatchesExec: target_batch_size=2
-08)------RepartitionExec: partitioning=Hash([s4@0], 2), input_partitions=2
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------MemoryExec: partitions=1, partition_sizes=[1]
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------FilterExec: s3@0 IS NOT NULL
+07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
+09)----CoalesceBatchesExec: target_batch_size=2
+10)------RepartitionExec: partitioning=Hash([s4@0], 2), input_partitions=2
+11)--------CoalesceBatchesExec: target_batch_size=2
+12)----------FilterExec: s4@0 IS NOT NULL
+13)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+14)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query ??
 select join_t3.s3, join_t4.s4
@@ -1404,8 +1419,10 @@ logical_plan
 02)--Aggregate: groupBy=[[join_t1.t1_id]], aggr=[[count(Int64(1)) AS count(*)]]
 03)----Projection: join_t1.t1_id
 04)------Inner Join: join_t1.t1_id = join_t2.t2_id
-05)--------TableScan: join_t1 projection=[t1_id]
-06)--------TableScan: join_t2 projection=[t2_id]
+05)--------Filter: join_t1.t1_id IS NOT NULL
+06)----------TableScan: join_t1 projection=[t1_id]
+07)--------Filter: join_t2.t2_id IS NOT NULL
+08)----------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[count(*)@1 as count(*)]
 02)--AggregateExec: mode=SinglePartitioned, gby=[t1_id@0 as t1_id], aggr=[count(*)]
@@ -1413,12 +1430,16 @@ physical_plan
 04)------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(t1_id@0, t2_id@0)], projection=[t1_id@0]
 05)--------CoalesceBatchesExec: target_batch_size=2
 06)----------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)--------CoalesceBatchesExec: target_batch_size=2
-10)----------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
-11)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)--------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------FilterExec: t1_id@0 IS NOT NULL
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------MemoryExec: partitions=1, partition_sizes=[1]
+11)--------CoalesceBatchesExec: target_batch_size=2
+12)----------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
+13)------------CoalesceBatchesExec: target_batch_size=2
+14)--------------FilterExec: t2_id@0 IS NOT NULL
+15)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+16)------------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN
@@ -1432,8 +1453,10 @@ logical_plan
 03)----Aggregate: groupBy=[[join_t1.t1_id AS alias1]], aggr=[[]]
 04)------Projection: join_t1.t1_id
 05)--------Inner Join: join_t1.t1_id = join_t2.t2_id
-06)----------TableScan: join_t1 projection=[t1_id]
-07)----------TableScan: join_t2 projection=[t2_id]
+06)----------Filter: join_t1.t1_id IS NOT NULL
+07)------------TableScan: join_t1 projection=[t1_id]
+08)----------Filter: join_t2.t2_id IS NOT NULL
+09)------------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[count(alias1)@0 as count(DISTINCT join_t1.t1_id)]
 02)--AggregateExec: mode=Final, gby=[], aggr=[count(alias1)]
@@ -1444,12 +1467,16 @@ physical_plan
 07)------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(t1_id@0, t2_id@0)], projection=[t1_id@0]
 08)--------------CoalesceBatchesExec: target_batch_size=2
 09)----------------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-10)------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-11)--------------------MemoryExec: partitions=1, partition_sizes=[1]
-12)--------------CoalesceBatchesExec: target_batch_size=2
-13)----------------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
-14)------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-15)--------------------MemoryExec: partitions=1, partition_sizes=[1]
+10)------------------CoalesceBatchesExec: target_batch_size=2
+11)--------------------FilterExec: t1_id@0 IS NOT NULL
+12)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+13)------------------------MemoryExec: partitions=1, partition_sizes=[1]
+14)--------------CoalesceBatchesExec: target_batch_size=2
+15)----------------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
+16)------------------CoalesceBatchesExec: target_batch_size=2
+17)--------------------FilterExec: t2_id@0 IS NOT NULL
+18)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+19)------------------------MemoryExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.explain.logical_plan_only = true;
@@ -1465,8 +1492,10 @@ where join_t1.t1_id + 12 = join_t2.t2_id + 1
 ----
 logical_plan
 01)Inner Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = CAST(join_t2.t2_id AS Int64) + Int64(1)
-02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-03)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+02)--Filter: CAST(join_t1.t1_id AS Int64) + Int64(12) IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+04)--Filter: CAST(join_t2.t2_id AS Int64) + Int64(1) IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 # Reduce cross join with cast expr join key (to inner join)
 
@@ -1479,8 +1508,10 @@ cross join join_t2 where join_t1.t1_id + 11 = cast(join_t2.t2_id as BIGINT)
 logical_plan
 01)Projection: join_t1.t1_id, join_t2.t2_id, join_t1.t1_name
 02)--Inner Join: CAST(join_t1.t1_id AS Int64) + Int64(11) = CAST(join_t2.t2_id AS Int64)
-03)----TableScan: join_t1 projection=[t1_id, t1_name]
-04)----TableScan: join_t2 projection=[t2_id]
+03)----Filter: CAST(join_t1.t1_id AS Int64) + Int64(11) IS NOT NULL
+04)------TableScan: join_t1 projection=[t1_id, t1_name]
+05)----Filter: CAST(join_t2.t2_id AS Int64) IS NOT NULL
+06)------TableScan: join_t2 projection=[t2_id]
 
 
 #####
@@ -1504,19 +1535,25 @@ where join_t1.t1_id + 11 = join_t2.t2_id
 logical_plan
 01)Projection: join_t1.t1_id, join_t1.t1_name, join_t1.t1_int, join_t2.t2_id, join_t2.t2_name, join_t2.t2_int, CAST(join_t1.t1_id AS Int64) + Int64(11)
 02)--Inner Join: CAST(join_t1.t1_id AS Int64) + Int64(11) = CAST(join_t2.t2_id AS Int64)
-03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-04)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+03)----Filter: CAST(join_t1.t1_id AS Int64) + Int64(11) IS NOT NULL
+04)------TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+05)----Filter: CAST(join_t2.t2_id AS Int64) IS NOT NULL
+06)------TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, t2_id@3 as t2_id, t2_name@4 as t2_name, t2_int@5 as t2_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
 02)--CoalesceBatchesExec: target_batch_size=2
 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t1.t1_id + Int64(11)@3, CAST(join_t2.t2_id AS Int64)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@4, t2_name@5, t2_int@6]
 04)------CoalescePartitionsExec
 05)--------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------MemoryExec: partitions=1, partition_sizes=[1]
-08)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)]
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------MemoryExec: partitions=1, partition_sizes=[1]
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: CAST(t1_id@0 AS Int64) + 11 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)]
+11)--------CoalesceBatchesExec: target_batch_size=2
+12)----------FilterExec: CAST(t2_id@0 AS Int64) IS NOT NULL
+13)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+14)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1530,8 +1567,10 @@ where join_t1.t1_id + 11 = join_t2.t2_id
 logical_plan
 01)Projection: join_t1.t1_id, join_t1.t1_name, join_t1.t1_int, join_t2.t2_id, join_t2.t2_name, join_t2.t2_int, CAST(join_t1.t1_id AS Int64) + Int64(11)
 02)--Inner Join: CAST(join_t1.t1_id AS Int64) + Int64(11) = CAST(join_t2.t2_id AS Int64)
-03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-04)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+03)----Filter: CAST(join_t1.t1_id AS Int64) + Int64(11) IS NOT NULL
+04)------TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+05)----Filter: CAST(join_t2.t2_id AS Int64) IS NOT NULL
+06)------TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 physical_plan
 01)ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, t2_id@3 as t2_id, t2_name@4 as t2_name, t2_int@5 as t2_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
 02)--CoalesceBatchesExec: target_batch_size=2
@@ -1539,13 +1578,17 @@ physical_plan
 04)------CoalesceBatchesExec: target_batch_size=2
 05)--------RepartitionExec: partitioning=Hash([join_t1.t1_id + Int64(11)@3], 2), input_partitions=2
 06)----------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_int@2 as t1_int, CAST(t1_id@0 AS Int64) + 11 as join_t1.t1_id + Int64(11)]
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)------CoalesceBatchesExec: target_batch_size=2
-10)--------RepartitionExec: partitioning=Hash([CAST(join_t2.t2_id AS Int64)@3], 2), input_partitions=2
-11)----------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)]
-12)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)--------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------FilterExec: CAST(t1_id@0 AS Int64) + 11 IS NOT NULL
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------MemoryExec: partitions=1, partition_sizes=[1]
+11)------CoalesceBatchesExec: target_batch_size=2
+12)--------RepartitionExec: partitioning=Hash([CAST(join_t2.t2_id AS Int64)@3], 2), input_partitions=2
+13)----------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, CAST(t2_id@0 AS Int64) as CAST(join_t2.t2_id AS Int64)]
+14)------------CoalesceBatchesExec: target_batch_size=2
+15)--------------FilterExec: CAST(t2_id@0 AS Int64) IS NOT NULL
+16)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+17)------------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # Both side expr key inner join
 
@@ -1561,19 +1604,25 @@ INNER JOIN join_t2 ON join_t1.t1_id + cast(12 as INT UNSIGNED) = join_t2.t2_id +
 logical_plan
 01)Projection: join_t1.t1_id, join_t2.t2_id, join_t1.t1_name
 02)--Inner Join: join_t1.t1_id + UInt32(12) = join_t2.t2_id + UInt32(1)
-03)----TableScan: join_t1 projection=[t1_id, t1_name]
-04)----TableScan: join_t2 projection=[t2_id]
+03)----Filter: join_t1.t1_id + UInt32(12) IS NOT NULL
+04)------TableScan: join_t1 projection=[t1_id, t1_name]
+05)----Filter: join_t2.t2_id + UInt32(1) IS NOT NULL
+06)------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
 02)--CoalesceBatchesExec: target_batch_size=2
 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id + UInt32(1)@1, join_t1.t1_id + UInt32(12)@2)], projection=[t2_id@0, t1_id@2, t1_name@3]
 04)------CoalescePartitionsExec
 05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as join_t2.t2_id + UInt32(1)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------MemoryExec: partitions=1, partition_sizes=[1]
-08)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)]
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------MemoryExec: partitions=1, partition_sizes=[1]
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: t2_id@0 + 1 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)]
+11)--------CoalesceBatchesExec: target_batch_size=2
+12)----------FilterExec: t1_id@0 + 12 IS NOT NULL
+13)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+14)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1587,8 +1636,10 @@ INNER JOIN join_t2 ON join_t1.t1_id + cast(12 as INT UNSIGNED) = join_t2.t2_id +
 logical_plan
 01)Projection: join_t1.t1_id, join_t2.t2_id, join_t1.t1_name
 02)--Inner Join: join_t1.t1_id + UInt32(12) = join_t2.t2_id + UInt32(1)
-03)----TableScan: join_t1 projection=[t1_id, t1_name]
-04)----TableScan: join_t2 projection=[t2_id]
+03)----Filter: join_t1.t1_id + UInt32(12) IS NOT NULL
+04)------TableScan: join_t1 projection=[t1_id, t1_name]
+05)----Filter: join_t2.t2_id + UInt32(1) IS NOT NULL
+06)------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
 02)--CoalesceBatchesExec: target_batch_size=2
@@ -1596,13 +1647,17 @@ physical_plan
 04)------CoalesceBatchesExec: target_batch_size=2
 05)--------RepartitionExec: partitioning=Hash([join_t2.t2_id + UInt32(1)@1], 2), input_partitions=2
 06)----------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 + 1 as join_t2.t2_id + UInt32(1)]
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)------CoalesceBatchesExec: target_batch_size=2
-10)--------RepartitionExec: partitioning=Hash([join_t1.t1_id + UInt32(12)@2], 2), input_partitions=2
-11)----------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)]
-12)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)--------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------FilterExec: t2_id@0 + 1 IS NOT NULL
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------MemoryExec: partitions=1, partition_sizes=[1]
+11)------CoalesceBatchesExec: target_batch_size=2
+12)--------RepartitionExec: partitioning=Hash([join_t1.t1_id + UInt32(12)@2], 2), input_partitions=2
+13)----------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 12 as join_t1.t1_id + UInt32(12)]
+14)------------CoalesceBatchesExec: target_batch_size=2
+15)--------------FilterExec: t1_id@0 + 12 IS NOT NULL
+16)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+17)------------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # Left side expr key inner join
 
@@ -1619,16 +1674,24 @@ ON join_t1.t1_id + cast(11 as INT UNSIGNED)  = join_t2.t2_id
 logical_plan
 01)Projection: join_t1.t1_id, join_t2.t2_id, join_t1.t1_name
 02)--Inner Join: join_t1.t1_id + UInt32(11) = join_t2.t2_id
-03)----TableScan: join_t1 projection=[t1_id, t1_name]
-04)----TableScan: join_t2 projection=[t2_id]
+03)----Filter: join_t1.t1_id + UInt32(11) IS NOT NULL
+04)------TableScan: join_t1 projection=[t1_id, t1_name]
+05)----Filter: join_t2.t2_id IS NOT NULL
+06)------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
 02)--CoalesceBatchesExec: target_batch_size=2
 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t2_id@0, join_t1.t1_id + UInt32(11)@2)], projection=[t2_id@0, t1_id@1, t1_name@2]
-04)------MemoryExec: partitions=1, partition_sizes=[1]
-05)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------MemoryExec: partitions=1, partition_sizes=[1]
+04)------CoalescePartitionsExec
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------FilterExec: t2_id@0 IS NOT NULL
+07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
+09)------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)]
+10)--------CoalesceBatchesExec: target_batch_size=2
+11)----------FilterExec: t1_id@0 + 11 IS NOT NULL
+12)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+13)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1643,21 +1706,27 @@ ON join_t1.t1_id + cast(11 as INT UNSIGNED)  = join_t2.t2_id
 logical_plan
 01)Projection: join_t1.t1_id, join_t2.t2_id, join_t1.t1_name
 02)--Inner Join: join_t1.t1_id + UInt32(11) = join_t2.t2_id
-03)----TableScan: join_t1 projection=[t1_id, t1_name]
-04)----TableScan: join_t2 projection=[t2_id]
+03)----Filter: join_t1.t1_id + UInt32(11) IS NOT NULL
+04)------TableScan: join_t1 projection=[t1_id, t1_name]
+05)----Filter: join_t2.t2_id IS NOT NULL
+06)------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
 02)--CoalesceBatchesExec: target_batch_size=2
 03)----HashJoinExec: mode=Partitioned, join_type=Inner, on=[(t2_id@0, join_t1.t1_id + UInt32(11)@2)], projection=[t2_id@0, t1_id@1, t1_name@2]
 04)------CoalesceBatchesExec: target_batch_size=2
 05)--------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------MemoryExec: partitions=1, partition_sizes=[1]
-08)------CoalesceBatchesExec: target_batch_size=2
-09)--------RepartitionExec: partitioning=Hash([join_t1.t1_id + UInt32(11)@2], 2), input_partitions=2
-10)----------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)]
-11)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)--------------MemoryExec: partitions=1, partition_sizes=[1]
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: t2_id@0 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)------CoalesceBatchesExec: target_batch_size=2
+11)--------RepartitionExec: partitioning=Hash([join_t1.t1_id + UInt32(11)@2], 2), input_partitions=2
+12)----------ProjectionExec: expr=[t1_id@0 as t1_id, t1_name@1 as t1_name, t1_id@0 + 11 as join_t1.t1_id + UInt32(11)]
+13)------------CoalesceBatchesExec: target_batch_size=2
+14)--------------FilterExec: t1_id@0 + 11 IS NOT NULL
+15)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+16)------------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # Right side expr key inner join
 
@@ -1674,18 +1743,24 @@ ON join_t1.t1_id = join_t2.t2_id - cast(11 as INT UNSIGNED)
 logical_plan
 01)Projection: join_t1.t1_id, join_t2.t2_id, join_t1.t1_name
 02)--Inner Join: join_t1.t1_id = join_t2.t2_id - UInt32(11)
-03)----TableScan: join_t1 projection=[t1_id, t1_name]
-04)----TableScan: join_t2 projection=[t2_id]
+03)----Filter: join_t1.t1_id IS NOT NULL
+04)------TableScan: join_t1 projection=[t1_id, t1_name]
+05)----Filter: join_t2.t2_id - UInt32(11) IS NOT NULL
+06)------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
 02)--CoalesceBatchesExec: target_batch_size=2
 03)----HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(join_t2.t2_id - UInt32(11)@1, t1_id@0)], projection=[t2_id@0, t1_id@2, t1_name@3]
 04)------CoalescePartitionsExec
 05)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------MemoryExec: partitions=1, partition_sizes=[1]
-08)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-09)--------MemoryExec: partitions=1, partition_sizes=[1]
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: t2_id@0 - 11 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)------CoalesceBatchesExec: target_batch_size=2
+11)--------FilterExec: t1_id@0 IS NOT NULL
+12)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+13)------------MemoryExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1700,8 +1775,10 @@ ON join_t1.t1_id = join_t2.t2_id - cast(11 as INT UNSIGNED)
 logical_plan
 01)Projection: join_t1.t1_id, join_t2.t2_id, join_t1.t1_name
 02)--Inner Join: join_t1.t1_id = join_t2.t2_id - UInt32(11)
-03)----TableScan: join_t1 projection=[t1_id, t1_name]
-04)----TableScan: join_t2 projection=[t2_id]
+03)----Filter: join_t1.t1_id IS NOT NULL
+04)------TableScan: join_t1 projection=[t1_id, t1_name]
+05)----Filter: join_t2.t2_id - UInt32(11) IS NOT NULL
+06)------TableScan: join_t2 projection=[t2_id]
 physical_plan
 01)ProjectionExec: expr=[t1_id@1 as t1_id, t2_id@0 as t2_id, t1_name@2 as t1_name]
 02)--CoalesceBatchesExec: target_batch_size=2
@@ -1709,12 +1786,16 @@ physical_plan
 04)------CoalesceBatchesExec: target_batch_size=2
 05)--------RepartitionExec: partitioning=Hash([join_t2.t2_id - UInt32(11)@1], 2), input_partitions=2
 06)----------ProjectionExec: expr=[t2_id@0 as t2_id, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)------CoalesceBatchesExec: target_batch_size=2
-10)--------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-11)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------FilterExec: t2_id@0 - 11 IS NOT NULL
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------MemoryExec: partitions=1, partition_sizes=[1]
+11)------CoalesceBatchesExec: target_batch_size=2
+12)--------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
+13)----------CoalesceBatchesExec: target_batch_size=2
+14)------------FilterExec: t1_id@0 IS NOT NULL
+15)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+16)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # Select wildcard with expr key inner join
 
@@ -1730,15 +1811,23 @@ ON join_t1.t1_id = join_t2.t2_id - cast(11 as INT UNSIGNED)
 ----
 logical_plan
 01)Inner Join: join_t1.t1_id = join_t2.t2_id - UInt32(11)
-02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-03)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+02)--Filter: join_t1.t1_id IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+04)--Filter: join_t2.t2_id - UInt32(11) IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=2
 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(t1_id@0, join_t2.t2_id - UInt32(11)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5]
-03)----MemoryExec: partitions=1, partition_sizes=[1]
-04)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
-05)------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)--------MemoryExec: partitions=1, partition_sizes=[1]
+03)----CoalescePartitionsExec
+04)------CoalesceBatchesExec: target_batch_size=2
+05)--------FilterExec: t1_id@0 IS NOT NULL
+06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+07)------------MemoryExec: partitions=1, partition_sizes=[1]
+08)----ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
+09)------CoalesceBatchesExec: target_batch_size=2
+10)--------FilterExec: t2_id@0 - 11 IS NOT NULL
+11)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+12)------------MemoryExec: partitions=1, partition_sizes=[1]
 
 statement ok
 set datafusion.optimizer.repartition_joins = true;
@@ -1752,20 +1841,26 @@ ON join_t1.t1_id = join_t2.t2_id - cast(11 as INT UNSIGNED)
 ----
 logical_plan
 01)Inner Join: join_t1.t1_id = join_t2.t2_id - UInt32(11)
-02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-03)--TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+02)--Filter: join_t1.t1_id IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+04)--Filter: join_t2.t2_id - UInt32(11) IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=2
 02)--HashJoinExec: mode=Partitioned, join_type=Inner, on=[(t1_id@0, join_t2.t2_id - UInt32(11)@3)], projection=[t1_id@0, t1_name@1, t1_int@2, t2_id@3, t2_name@4, t2_int@5]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------MemoryExec: partitions=1, partition_sizes=[1]
-07)----CoalesceBatchesExec: target_batch_size=2
-08)------RepartitionExec: partitioning=Hash([join_t2.t2_id - UInt32(11)@3], 2), input_partitions=2
-09)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
-10)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-11)------------MemoryExec: partitions=1, partition_sizes=[1]
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------FilterExec: t1_id@0 IS NOT NULL
+07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
+09)----CoalesceBatchesExec: target_batch_size=2
+10)------RepartitionExec: partitioning=Hash([join_t2.t2_id - UInt32(11)@3], 2), input_partitions=2
+11)--------ProjectionExec: expr=[t2_id@0 as t2_id, t2_name@1 as t2_name, t2_int@2 as t2_int, t2_id@0 - 11 as join_t2.t2_id - UInt32(11)]
+12)----------CoalesceBatchesExec: target_batch_size=2
+13)------------FilterExec: t2_id@0 - 11 IS NOT NULL
+14)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+15)----------------MemoryExec: partitions=1, partition_sizes=[1]
 
 #####
 # Config teardown
@@ -1785,8 +1880,10 @@ inner join join_t2 on join_t1.t1_id + 11 = join_t2.t2_id
 ----
 logical_plan
 01)Inner Join: CAST(join_t1.t1_id AS Int64) + Int64(11) = CAST(join_t2.t2_id AS Int64)
-02)--TableScan: join_t1 projection=[t1_id, t1_name]
-03)--TableScan: join_t2 projection=[t2_id]
+02)--Filter: CAST(join_t1.t1_id AS Int64) + Int64(11) IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name]
+04)--Filter: CAST(join_t2.t2_id AS Int64) IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id]
 
 # Join only with filter
 
@@ -1812,8 +1909,10 @@ on join_t1.t1_id * 5 = join_t2.t2_id and join_t1.t1_id * 4 < join_t2.t2_id
 ----
 logical_plan
 01)Inner Join: CAST(join_t1.t1_id AS Int64) * Int64(5) = CAST(join_t2.t2_id AS Int64) Filter: CAST(join_t1.t1_id AS Int64) * Int64(4) < CAST(join_t2.t2_id AS Int64)
-02)--TableScan: join_t1 projection=[t1_id, t1_name]
-03)--TableScan: join_t2 projection=[t2_id]
+02)--Filter: CAST(join_t1.t1_id AS Int64) * Int64(5) IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name]
+04)--Filter: CAST(join_t2.t2_id AS Int64) IS NOT NULL
+05)----TableScan: join_t2 projection=[t2_id]
 
 # Test cross join to groupby with different key ordering
 
@@ -1867,10 +1966,12 @@ where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2)
 ----
 logical_plan
 01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1)
-02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-03)--SubqueryAlias: __correlated_sq_1
-04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1)
-05)------TableScan: join_t2 projection=[t2_id]
+02)--Filter: CAST(join_t1.t1_id AS Int64) + Int64(12) IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+04)--SubqueryAlias: __correlated_sq_1
+05)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1)
+06)------Filter: CAST(join_t2.t2_id AS Int64) + Int64(1) IS NOT NULL
+07)--------TableScan: join_t2 projection=[t2_id]
 
 query ITI rowsort
 select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int
@@ -1896,11 +1997,12 @@ where join_t1.t1_id + 12 in
 ----
 logical_plan
 01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int
-02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-03)--SubqueryAlias: __correlated_sq_1
-04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int
-05)------Filter: join_t2.t2_int > UInt32(0)
-06)--------TableScan: join_t2 projection=[t2_id, t2_int]
+02)--Filter: CAST(join_t1.t1_id AS Int64) + Int64(12) IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+04)--SubqueryAlias: __correlated_sq_1
+05)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int
+06)------Filter: CAST(join_t2.t2_id AS Int64) + Int64(1) IS NOT NULL AND join_t2.t2_int > UInt32(0)
+07)--------TableScan: join_t2 projection=[t2_id, t2_int]
 
 query ITI rowsort
 select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int
@@ -1932,11 +2034,12 @@ where join_t1.t1_id + 12 in
 ----
 logical_plan
 01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name
-02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
-03)--SubqueryAlias: __correlated_sq_1
-04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name
-05)------Filter: join_t2.t2_int > UInt32(0)
-06)--------TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+02)--Filter: CAST(join_t1.t1_id AS Int64) + Int64(12) IS NOT NULL
+03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
+04)--SubqueryAlias: __correlated_sq_1
+05)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name
+06)------Filter: CAST(join_t2.t2_id AS Int64) + Int64(1) IS NOT NULL AND join_t2.t2_int > UInt32(0)
+07)--------TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 query ITI rowsort
 select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int
@@ -1964,11 +2067,12 @@ where join_t1.t1_id + 12 in
 ----
 logical_plan
 01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1)
-02)--Filter: join_t1.t1_int > UInt32(0)
+02)--Filter: CAST(join_t1.t1_id AS Int64) + Int64(12) IS NOT NULL AND join_t1.t1_int > UInt32(0)
 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 04)--SubqueryAlias: __correlated_sq_1
 05)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1)
-06)------TableScan: join_t2 projection=[t2_id]
+06)------Filter: CAST(join_t2.t2_id AS Int64) + Int64(1) IS NOT NULL
+07)--------TableScan: join_t2 projection=[t2_id]
 
 # Not in subquery to join with correlated outer filter
 
@@ -1984,7 +2088,8 @@ logical_plan
 02)--TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 03)--SubqueryAlias: __correlated_sq_1
 04)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1)
-05)------TableScan: join_t2 projection=[t2_id]
+05)------Filter: CAST(join_t2.t2_id AS Int64) + Int64(1) IS NOT NULL
+06)--------TableScan: join_t2 projection=[t2_id]
 
 # In subquery to join with outer filter
 
@@ -2003,11 +2108,12 @@ where join_t1.t1_id + 12 in
 ----
 logical_plan
 01)LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1) Filter: join_t1.t1_int <= __correlated_sq_1.t2_int AND join_t1.t1_name != __correlated_sq_1.t2_name
-02)--Filter: join_t1.t1_id > UInt32(0)
+02)--Filter: CAST(join_t1.t1_id AS Int64) + Int64(12) IS NOT NULL AND join_t1.t1_id > UInt32(0)
 03)----TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 04)--SubqueryAlias: __correlated_sq_1
 05)----Projection: CAST(join_t2.t2_id AS Int64) + Int64(1), join_t2.t2_int, join_t2.t2_name
-06)------TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
+06)------Filter: CAST(join_t2.t2_id AS Int64) + Int64(1) IS NOT NULL
+07)--------TableScan: join_t2 projection=[t2_id, t2_name, t2_int]
 
 query ITI rowsort
 select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int
@@ -2037,14 +2143,16 @@ where join_t1.t1_id + 12 in (select join_t2.t2_id + 1 from join_t2)
 logical_plan
 01)LeftSemi Join: CAST(join_t1.t1_int AS Int64) = __correlated_sq_2.join_t2.t2_int + Int64(1)
 02)--LeftSemi Join: CAST(join_t1.t1_id AS Int64) + Int64(12) = __correlated_sq_1.join_t2.t2_id + Int64(1)
-03)----Filter: join_t1.t1_id > UInt32(0)
+03)----Filter: CAST(join_t1.t1_int AS Int64) IS NOT NULL AND CAST(join_t1.t1_id AS Int64) + Int64(12) IS NOT NULL AND join_t1.t1_id > UInt32(0)
 04)------TableScan: join_t1 projection=[t1_id, t1_name, t1_int]
 05)----SubqueryAlias: __correlated_sq_1
 06)------Projection: CAST(join_t2.t2_id AS Int64) + Int64(1)
-07)--------TableScan: join_t2 projection=[t2_id]
-08)--SubqueryAlias: __correlated_sq_2
-09)----Projection: CAST(join_t2.t2_int AS Int64) + Int64(1)
-10)------TableScan: join_t2 projection=[t2_int]
+07)--------Filter: CAST(join_t2.t2_id AS Int64) + Int64(1) IS NOT NULL
+08)----------TableScan: join_t2 projection=[t2_id]
+09)--SubqueryAlias: __correlated_sq_2
+10)----Projection: CAST(join_t2.t2_int AS Int64) + Int64(1)
+11)------Filter: CAST(join_t2.t2_int AS Int64) + Int64(1) IS NOT NULL
+12)--------TableScan: join_t2 projection=[t2_int]
 
 query ITI
 select join_t1.t1_id, join_t1.t1_name, join_t1.t1_int
@@ -2598,20 +2706,26 @@ EXPLAIN SELECT * FROM test_timestamps_tz_table as t1 JOIN test_timestamps_tz_tab
 logical_plan
 01)Inner Join: t1.millis = t2.millis
 02)--SubqueryAlias: t1
-03)----TableScan: test_timestamps_tz_table projection=[nanos, micros, millis, secs, names]
-04)--SubqueryAlias: t2
-05)----TableScan: test_timestamps_tz_table projection=[nanos, micros, millis, secs, names]
+03)----Filter: test_timestamps_tz_table.millis IS NOT NULL
+04)------TableScan: test_timestamps_tz_table projection=[nanos, micros, millis, secs, names]
+05)--SubqueryAlias: t2
+06)----Filter: test_timestamps_tz_table.millis IS NOT NULL
+07)------TableScan: test_timestamps_tz_table projection=[nanos, micros, millis, secs, names]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=2
 02)--HashJoinExec: mode=Partitioned, join_type=Inner, on=[(millis@2, millis@2)]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------RepartitionExec: partitioning=Hash([millis@2], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------MemoryExec: partitions=1, partition_sizes=[1]
-07)----CoalesceBatchesExec: target_batch_size=2
-08)------RepartitionExec: partitioning=Hash([millis@2], 2), input_partitions=2
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------MemoryExec: partitions=1, partition_sizes=[1]
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------FilterExec: millis@2 IS NOT NULL
+07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
+09)----CoalesceBatchesExec: target_batch_size=2
+10)------RepartitionExec: partitioning=Hash([millis@2], 2), input_partitions=2
+11)--------CoalesceBatchesExec: target_batch_size=2
+12)----------FilterExec: millis@2 IS NOT NULL
+13)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+14)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # left_join_using_2
 query II
@@ -2672,9 +2786,11 @@ explain select * from hashjoin_datatype_table_t1 t1 join hashjoin_datatype_table
 logical_plan
 01)Inner Join: t1.c1 = t2.c1
 02)--SubqueryAlias: t1
-03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
-04)--SubqueryAlias: t2
-05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
+03)----Filter: hashjoin_datatype_table_t1.c1 IS NOT NULL
+04)------TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
+05)--SubqueryAlias: t2
+06)----Filter: hashjoin_datatype_table_t2.c1 IS NOT NULL
+07)------TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
 
 # hash_join_with_date32
 query DDR?DDR? rowsort
@@ -2693,7 +2809,8 @@ logical_plan
 02)--SubqueryAlias: t1
 03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
 04)--SubqueryAlias: t2
-05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
+05)----Filter: hashjoin_datatype_table_t2.c2 IS NOT NULL
+06)------TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
 
 # hash_join_with_date64
 query DDR?DDR? rowsort
@@ -2712,9 +2829,10 @@ explain select * from hashjoin_datatype_table_t1 t1 right join hashjoin_datatype
 logical_plan
 01)Right Join: t1.c3 = t2.c3
 02)--SubqueryAlias: t1
-03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
-04)--SubqueryAlias: t2
-05)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
+03)----Filter: hashjoin_datatype_table_t1.c3 IS NOT NULL
+04)------TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
+05)--SubqueryAlias: t2
+06)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
 
 # hash_join_with_decimal
 query DDR?DDR? rowsort
@@ -2732,9 +2850,11 @@ explain select * from hashjoin_datatype_table_t1 t1 join hashjoin_datatype_table
 logical_plan
 01)Inner Join: t1.c4 = t2.c4
 02)--SubqueryAlias: t1
-03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
-04)--SubqueryAlias: t2
-05)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
+03)----Filter: hashjoin_datatype_table_t1.c4 IS NOT NULL
+04)------TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
+05)--SubqueryAlias: t2
+06)----Filter: hashjoin_datatype_table_t1.c4 IS NOT NULL
+07)------TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
 
 # hash_join_with_dictionary
 query DDR?DDR? rowsort
@@ -2771,21 +2891,27 @@ explain select * from hashjoin_datatype_table_t1 t1 join hashjoin_datatype_table
 logical_plan
 01)Inner Join: t1.c1 = t2.c1
 02)--SubqueryAlias: t1
-03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
-04)--SubqueryAlias: t2
-05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
+03)----Filter: hashjoin_datatype_table_t1.c1 IS NOT NULL
+04)------TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
+05)--SubqueryAlias: t2
+06)----Filter: hashjoin_datatype_table_t2.c1 IS NOT NULL
+07)------TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
 physical_plan
 01)SortMergeJoin: join_type=Inner, on=[(c1@0, c1@0)]
 02)--SortExec: expr=[c1@0 ASC], preserve_partitioning=[true]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------MemoryExec: partitions=1, partition_sizes=[1]
-07)--SortExec: expr=[c1@0 ASC], preserve_partitioning=[true]
-08)----CoalesceBatchesExec: target_batch_size=2
-09)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-10)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-11)----------MemoryExec: partitions=1, partition_sizes=[1]
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------FilterExec: c1@0 IS NOT NULL
+07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)--------------MemoryExec: partitions=1, partition_sizes=[1]
+09)--SortExec: expr=[c1@0 ASC], preserve_partitioning=[true]
+10)----CoalesceBatchesExec: target_batch_size=2
+11)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
+12)--------CoalesceBatchesExec: target_batch_size=2
+13)----------FilterExec: c1@0 IS NOT NULL
+14)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+15)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # sort_merge_join_on_date32 inner sort merge join on data type (Date32)
 query DDR?DDR? rowsort
@@ -2801,9 +2927,10 @@ explain select * from hashjoin_datatype_table_t1 t1 right join hashjoin_datatype
 logical_plan
 01)Right Join: CAST(t1.c3 AS Decimal128(10, 2)) = t2.c3
 02)--SubqueryAlias: t1
-03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
-04)--SubqueryAlias: t2
-05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
+03)----Filter: CAST(hashjoin_datatype_table_t1.c3 AS Decimal128(10, 2)) IS NOT NULL
+04)------TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
+05)--SubqueryAlias: t2
+06)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
 physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3, c4@3 as c4, c1@5 as c1, c2@6 as c2, c3@7 as c3, c4@8 as c4]
 02)--SortMergeJoin: join_type=Right, on=[(CAST(t1.c3 AS Decimal128(10, 2))@4, c3@2)]
@@ -2811,13 +2938,15 @@ physical_plan
 04)------CoalesceBatchesExec: target_batch_size=2
 05)--------RepartitionExec: partitioning=Hash([CAST(t1.c3 AS Decimal128(10, 2))@4], 2), input_partitions=2
 06)----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3, c4@3 as c4, CAST(c3@2 AS Decimal128(10, 2)) as CAST(t1.c3 AS Decimal128(10, 2))]
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)----SortExec: expr=[c3@2 ASC], preserve_partitioning=[true]
-10)------CoalesceBatchesExec: target_batch_size=2
-11)--------RepartitionExec: partitioning=Hash([c3@2], 2), input_partitions=2
-12)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------FilterExec: CAST(c3@2 AS Decimal128(10, 2)) IS NOT NULL
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------MemoryExec: partitions=1, partition_sizes=[1]
+11)----SortExec: expr=[c3@2 ASC], preserve_partitioning=[true]
+12)------CoalesceBatchesExec: target_batch_size=2
+13)--------RepartitionExec: partitioning=Hash([c3@2], 2), input_partitions=2
+14)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+15)------------MemoryExec: partitions=1, partition_sizes=[1]
 
 # sort_merge_join_on_decimal right join on data type (Decimal)
 query DDR?DDR? rowsort
@@ -2874,12 +3003,16 @@ physical_plan
 04)------HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
 05)--------CoalesceBatchesExec: target_batch_size=2
 06)----------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)--------CoalesceBatchesExec: target_batch_size=2
-10)----------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-11)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)--------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------FilterExec: t2_id@0 IS NOT NULL
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------MemoryExec: partitions=1, partition_sizes=[1]
+11)--------CoalesceBatchesExec: target_batch_size=2
+12)----------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
+13)------------CoalesceBatchesExec: target_batch_size=2
+14)--------------FilterExec: t1_id@0 IS NOT NULL
+15)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+16)------------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query IT rowsort
 SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 WHERE t1_id IN (SELECT t2_id FROM left_semi_anti_join_table_t2 t2) ORDER BY t1_id
@@ -2915,12 +3048,16 @@ physical_plan
 04)------HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
 05)--------CoalesceBatchesExec: target_batch_size=2
 06)----------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)--------CoalesceBatchesExec: target_batch_size=2
-10)----------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-11)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)--------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------FilterExec: t2_id@0 IS NOT NULL
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------MemoryExec: partitions=1, partition_sizes=[1]
+11)--------CoalesceBatchesExec: target_batch_size=2
+12)----------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
+13)------------CoalesceBatchesExec: target_batch_size=2
+14)--------------FilterExec: t1_id@0 IS NOT NULL
+15)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+16)------------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query IT
 SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 LEFT SEMI JOIN left_semi_anti_join_table_t2 t2 ON (t1_id = t2_id) ORDER BY t1_id
@@ -2975,9 +3112,15 @@ physical_plan
 02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
-05)--------MemoryExec: partitions=1, partition_sizes=[1]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------MemoryExec: partitions=1, partition_sizes=[1]
+05)--------CoalescePartitionsExec
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: t2_id@0 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)--------CoalesceBatchesExec: target_batch_size=2
+11)----------FilterExec: t1_id@0 IS NOT NULL
+12)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+13)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query IT rowsort
 SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 WHERE t1_id IN (SELECT t2_id FROM left_semi_anti_join_table_t2 t2) ORDER BY t1_id
@@ -3011,9 +3154,15 @@ physical_plan
 02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)]
-05)--------MemoryExec: partitions=1, partition_sizes=[1]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------MemoryExec: partitions=1, partition_sizes=[1]
+05)--------CoalescePartitionsExec
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: t2_id@0 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)--------CoalesceBatchesExec: target_batch_size=2
+11)----------FilterExec: t1_id@0 IS NOT NULL
+12)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+13)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query IT
 SELECT t1_id, t1_name FROM left_semi_anti_join_table_t1 t1 LEFT SEMI JOIN left_semi_anti_join_table_t2 t2 ON (t1_id = t2_id) ORDER BY t1_id
@@ -3071,12 +3220,16 @@ physical_plan
 04)------HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@1 != t1_name@0
 05)--------CoalesceBatchesExec: target_batch_size=2
 06)----------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)--------CoalesceBatchesExec: target_batch_size=2
-10)----------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-11)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)--------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------FilterExec: t2_id@0 IS NOT NULL
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------MemoryExec: partitions=1, partition_sizes=[1]
+11)--------CoalesceBatchesExec: target_batch_size=2
+12)----------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
+13)------------CoalesceBatchesExec: target_batch_size=2
+14)--------------FilterExec: t1_id@0 IS NOT NULL
+15)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+16)------------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query ITI rowsort
 SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t1 t1 WHERE EXISTS (SELECT * FROM right_semi_anti_join_table_t2 t2 where t2.t2_id = t1.t1_id and t2.t2_name <> t1.t1_name) ORDER BY t1_id
@@ -3093,12 +3246,16 @@ physical_plan
 04)------HashJoinExec: mode=Partitioned, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@0 != t1_name@1
 05)--------CoalesceBatchesExec: target_batch_size=2
 06)----------RepartitionExec: partitioning=Hash([t2_id@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------MemoryExec: partitions=1, partition_sizes=[1]
-09)--------CoalesceBatchesExec: target_batch_size=2
-10)----------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
-11)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)--------------MemoryExec: partitions=1, partition_sizes=[1]
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------FilterExec: t2_id@0 IS NOT NULL
+09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+10)------------------MemoryExec: partitions=1, partition_sizes=[1]
+11)--------CoalesceBatchesExec: target_batch_size=2
+12)----------RepartitionExec: partitioning=Hash([t1_id@0], 2), input_partitions=2
+13)------------CoalesceBatchesExec: target_batch_size=2
+14)--------------FilterExec: t1_id@0 IS NOT NULL
+15)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+16)------------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query ITI rowsort
 SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t2 t2 RIGHT SEMI JOIN right_semi_anti_join_table_t1 t1 on (t2.t2_id = t1.t1_id and t2.t2_name <> t1.t1_name) ORDER BY t1_id
@@ -3151,9 +3308,15 @@ physical_plan
 02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@1 != t1_name@0
-05)--------MemoryExec: partitions=1, partition_sizes=[1]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------MemoryExec: partitions=1, partition_sizes=[1]
+05)--------CoalescePartitionsExec
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: t2_id@0 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)--------CoalesceBatchesExec: target_batch_size=2
+11)----------FilterExec: t1_id@0 IS NOT NULL
+12)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+13)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query ITI rowsort
 SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t1 t1 WHERE EXISTS (SELECT * FROM right_semi_anti_join_table_t2 t2 where t2.t2_id = t1.t1_id and t2.t2_name <> t1.t1_name) ORDER BY t1_id
@@ -3168,9 +3331,15 @@ physical_plan
 02)--SortExec: expr=[t1_id@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(t2_id@0, t1_id@0)], filter=t2_name@0 != t1_name@1
-05)--------MemoryExec: partitions=1, partition_sizes=[1]
-06)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)----------MemoryExec: partitions=1, partition_sizes=[1]
+05)--------CoalescePartitionsExec
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: t2_id@0 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------MemoryExec: partitions=1, partition_sizes=[1]
+10)--------CoalesceBatchesExec: target_batch_size=2
+11)----------FilterExec: t1_id@0 IS NOT NULL
+12)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+13)--------------MemoryExec: partitions=1, partition_sizes=[1]
 
 query ITI rowsort
 SELECT t1_id, t1_name, t1_int FROM right_semi_anti_join_table_t2 t2 RIGHT SEMI JOIN right_semi_anti_join_table_t1 t1 on (t2.t2_id = t1.t1_id and t2.t2_name <> t1.t1_name) ORDER BY t1_id
@@ -3240,23 +3409,29 @@ logical_plan
 02)--Inner Join: l_table.a = r_table.a
 03)----SubqueryAlias: l_table
 04)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-05)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-06)----------TableScan: annotated_data projection=[a0, a, b, c, d]
-07)----SubqueryAlias: r_table
-08)------TableScan: annotated_data projection=[a0, a, b, c, d]
+05)--------Filter: annotated_data.a IS NOT NULL
+06)----------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+07)------------TableScan: annotated_data projection=[a0, a, b, c, d]
+08)----SubqueryAlias: r_table
+09)------Filter: annotated_data.a IS NOT NULL
+10)--------TableScan: annotated_data projection=[a0, a, b, c, d], partial_filters=[annotated_data.a IS NOT NULL]
 physical_plan
 01)SortPreservingMergeExec: [rn1@5 ASC NULLS LAST]
 02)--SortMergeJoin: join_type=Inner, on=[(a@1, a@1)]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
-08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
-09)----CoalesceBatchesExec: target_batch_size=2
-10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST
-11)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+05)--------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+06)----------CoalesceBatchesExec: target_batch_size=2
+07)------------FilterExec: a@1 IS NOT NULL
+08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+11)----CoalesceBatchesExec: target_batch_size=2
+12)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST
+13)--------CoalesceBatchesExec: target_batch_size=2
+14)----------FilterExec: a@1 IS NOT NULL
+15)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+16)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 # sort merge join should propagate ordering equivalence of the right side
 # for right join. Hence final requirement rn1 ASC is already satisfied at
@@ -3273,24 +3448,27 @@ logical_plan
 01)Sort: r_table.rn1 ASC NULLS LAST
 02)--Right Join: l_table.a = r_table.a
 03)----SubqueryAlias: l_table
-04)------TableScan: annotated_data projection=[a0, a, b, c, d]
-05)----SubqueryAlias: r_table
-06)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-07)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-08)----------TableScan: annotated_data projection=[a0, a, b, c, d]
+04)------Filter: annotated_data.a IS NOT NULL
+05)--------TableScan: annotated_data projection=[a0, a, b, c, d], partial_filters=[annotated_data.a IS NOT NULL]
+06)----SubqueryAlias: r_table
+07)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+08)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+09)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)SortPreservingMergeExec: [rn1@10 ASC NULLS LAST]
 02)--SortMergeJoin: join_type=Right, on=[(a@1, a@1)]
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
-07)----CoalesceBatchesExec: target_batch_size=2
-08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
-12)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------FilterExec: a@1 IS NOT NULL
+07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+08)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+09)----CoalesceBatchesExec: target_batch_size=2
+10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST
+11)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+12)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+13)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+14)--------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 statement ok
 set datafusion.optimizer.prefer_existing_sort = false;
@@ -3313,12 +3491,14 @@ logical_plan
 02)--Inner Join: l_table.a = r_table.a
 03)----SubqueryAlias: l_table
 04)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-05)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-06)----------TableScan: annotated_data projection=[a0, a, b, c, d]
-07)----SubqueryAlias: r_table
-08)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-09)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-10)----------TableScan: annotated_data projection=[a0, a, b, c, d]
+05)--------Filter: annotated_data.a IS NOT NULL
+06)----------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+07)------------TableScan: annotated_data projection=[a0, a, b, c, d]
+08)----SubqueryAlias: r_table
+09)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+10)--------Filter: annotated_data.a IS NOT NULL
+11)----------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+12)------------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)SortPreservingMergeExec: [a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@11 ASC NULLS LAST]
 02)--SortExec: expr=[a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@11 ASC NULLS LAST], preserve_partitioning=[true]
@@ -3326,17 +3506,21 @@ physical_plan
 04)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=2
 06)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
-10)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
-11)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
-12)--------CoalesceBatchesExec: target_batch_size=2
-13)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
-14)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-15)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
-17)------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+07)------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+08)--------------CoalesceBatchesExec: target_batch_size=2
+09)----------------FilterExec: a@1 IS NOT NULL
+10)------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+11)--------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+12)----------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+13)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
+14)--------CoalesceBatchesExec: target_batch_size=2
+15)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
+16)------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+17)--------------CoalesceBatchesExec: target_batch_size=2
+18)----------------FilterExec: a@1 IS NOT NULL
+19)------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+20)--------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+21)----------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 statement ok
 set datafusion.optimizer.prefer_hash_join = true;
@@ -3360,18 +3544,24 @@ logical_plan
 01)Sort: r_table.rn1 ASC NULLS LAST
 02)--Inner Join: l_table.a = r_table.a
 03)----SubqueryAlias: l_table
-04)------TableScan: annotated_data projection=[a0, a, b, c, d]
-05)----SubqueryAlias: r_table
-06)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-07)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-08)----------TableScan: annotated_data projection=[a0, a, b, c, d]
+04)------Filter: annotated_data.a IS NOT NULL
+05)--------TableScan: annotated_data projection=[a0, a, b, c, d], partial_filters=[annotated_data.a IS NOT NULL]
+06)----SubqueryAlias: r_table
+07)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+08)--------Filter: annotated_data.a IS NOT NULL
+09)----------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+10)------------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=2
 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@1, a@1)]
-03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
-04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
-06)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+03)----CoalesceBatchesExec: target_batch_size=2
+04)------FilterExec: a@1 IS NOT NULL
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+06)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+07)------CoalesceBatchesExec: target_batch_size=2
+08)--------FilterExec: a@1 IS NOT NULL
+09)----------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+10)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 # hash join should propagate ordering equivalence of the right side for RIGHT ANTI join.
 # Hence final requirement rn1 ASC is already satisfied at the end of HashJoinExec.
@@ -3387,18 +3577,21 @@ logical_plan
 01)Sort: r_table.rn1 ASC NULLS LAST
 02)--RightAnti Join: l_table.a = r_table.a
 03)----SubqueryAlias: l_table
-04)------TableScan: annotated_data projection=[a]
-05)----SubqueryAlias: r_table
-06)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
-07)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
-08)----------TableScan: annotated_data projection=[a0, a, b, c, d]
+04)------Filter: annotated_data.a IS NOT NULL
+05)--------TableScan: annotated_data projection=[a], partial_filters=[annotated_data.a IS NOT NULL]
+06)----SubqueryAlias: r_table
+07)------Projection: annotated_data.a0, annotated_data.a, annotated_data.b, annotated_data.c, annotated_data.d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING AS rn1
+08)--------WindowAggr: windowExpr=[[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]]
+09)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=2
 02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(a@0, a@1)]
-03)----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC], has_header=true
-04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
-06)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
+03)----CoalesceBatchesExec: target_batch_size=2
+04)------FilterExec: a@0 IS NOT NULL
+05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC], has_header=true
+06)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+07)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }], mode=[Sorted]
+08)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true
 
 query TT
 EXPLAIN SELECT l.a, LAST_VALUE(r.b ORDER BY r.a ASC NULLS FIRST) as last_col1
@@ -3414,16 +3607,22 @@ logical_plan
 03)----Aggregate: groupBy=[[l.a, l.b, l.c]], aggr=[[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]]]
 04)------Inner Join: l.a = r.a
 05)--------SubqueryAlias: l
-06)----------TableScan: annotated_data projection=[a, b, c]
-07)--------SubqueryAlias: r
-08)----------TableScan: annotated_data projection=[a, b]
+06)----------Filter: annotated_data.a IS NOT NULL
+07)------------TableScan: annotated_data projection=[a, b, c], partial_filters=[annotated_data.a IS NOT NULL]
+08)--------SubqueryAlias: r
+09)----------Filter: annotated_data.a IS NOT NULL
+10)------------TableScan: annotated_data projection=[a, b], partial_filters=[annotated_data.a IS NOT NULL]
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]@3 as last_col1]
 02)--AggregateExec: mode=Single, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]], ordering_mode=PartiallySorted([0])
 03)----CoalesceBatchesExec: target_batch_size=2
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@0, a@0)]
-05)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
-06)--------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], has_header=true
+05)--------CoalesceBatchesExec: target_batch_size=2
+06)----------FilterExec: a@0 IS NOT NULL
+07)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+08)--------CoalesceBatchesExec: target_batch_size=2
+09)----------FilterExec: a@0 IS NOT NULL
+10)------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], has_header=true
 
 # create a table where there more than one valid ordering
 # that describes table.
@@ -3503,9 +3702,11 @@ logical_plan
 03)----Aggregate: groupBy=[[l.a, l.b, l.c]], aggr=[[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]]]
 04)------Inner Join: l.a = r.a
 05)--------SubqueryAlias: l
-06)----------TableScan: annotated_data projection=[a, b, c]
-07)--------SubqueryAlias: r
-08)----------TableScan: annotated_data projection=[a, b]
+06)----------Filter: annotated_data.a IS NOT NULL
+07)------------TableScan: annotated_data projection=[a, b, c], partial_filters=[annotated_data.a IS NOT NULL]
+08)--------SubqueryAlias: r
+09)----------Filter: annotated_data.a IS NOT NULL
+10)------------TableScan: annotated_data projection=[a, b], partial_filters=[annotated_data.a IS NOT NULL]
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC]
 02)--SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
@@ -3518,12 +3719,16 @@ physical_plan
 09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a@0)]
 10)------------------CoalesceBatchesExec: target_batch_size=2
 11)--------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-12)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
-14)------------------CoalesceBatchesExec: target_batch_size=2
-15)--------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-16)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-17)------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], has_header=true
+12)----------------------CoalesceBatchesExec: target_batch_size=2
+13)------------------------FilterExec: a@0 IS NOT NULL
+14)--------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+15)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], has_header=true
+16)------------------CoalesceBatchesExec: target_batch_size=2
+17)--------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+18)----------------------CoalesceBatchesExec: target_batch_size=2
+19)------------------------FilterExec: a@0 IS NOT NULL
+20)--------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+21)----------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], has_header=true
 
 query TT
 EXPLAIN SELECT *
@@ -3928,16 +4133,19 @@ EXPLAIN SELECT * FROM (
 logical_plan
 01)Right Join: lhs.b = rhs.b
 02)--SubqueryAlias: lhs
-03)----TableScan: left_table_no_nulls projection=[a, b]
-04)--SubqueryAlias: rhs
-05)----Sort: right_table_no_nulls.b ASC NULLS LAST, fetch=10
-06)------TableScan: right_table_no_nulls projection=[a, b]
+03)----Filter: left_table_no_nulls.b IS NOT NULL
+04)------TableScan: left_table_no_nulls projection=[a, b]
+05)--SubqueryAlias: rhs
+06)----Sort: right_table_no_nulls.b ASC NULLS LAST, fetch=10
+07)------TableScan: right_table_no_nulls projection=[a, b]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=3
 02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(b@1, b@1)]
-03)----MemoryExec: partitions=1, partition_sizes=[1]
-04)----SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)------MemoryExec: partitions=1, partition_sizes=[1]
+03)----CoalesceBatchesExec: target_batch_size=3
+04)------FilterExec: b@1 IS NOT NULL
+05)--------MemoryExec: partitions=1, partition_sizes=[1]
+06)----SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
+07)------MemoryExec: partitions=1, partition_sizes=[1]
 
 
 
@@ -3986,14 +4194,17 @@ EXPLAIN SELECT * FROM (
 logical_plan
 01)Right Join: lhs.b = rhs.b
 02)--SubqueryAlias: lhs
-03)----TableScan: left_table_no_nulls projection=[a, b]
-04)--SubqueryAlias: rhs
-05)----TableScan: right_table_no_nulls projection=[a, b]
+03)----Filter: left_table_no_nulls.b IS NOT NULL
+04)------TableScan: left_table_no_nulls projection=[a, b]
+05)--SubqueryAlias: rhs
+06)----TableScan: right_table_no_nulls projection=[a, b]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=3
 02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(b@1, b@1)]
-03)----MemoryExec: partitions=1, partition_sizes=[1]
-04)----MemoryExec: partitions=1, partition_sizes=[1]
+03)----CoalesceBatchesExec: target_batch_size=3
+04)------FilterExec: b@1 IS NOT NULL
+05)--------MemoryExec: partitions=1, partition_sizes=[1]
+06)----MemoryExec: partitions=1, partition_sizes=[1]
 
 
 # Null build indices:
@@ -4044,16 +4255,19 @@ EXPLAIN SELECT * FROM (
 logical_plan
 01)Right Join: lhs.b = rhs.b
 02)--SubqueryAlias: lhs
-03)----TableScan: left_table_no_nulls projection=[a, b]
-04)--SubqueryAlias: rhs
-05)----Sort: right_table_no_nulls.b ASC NULLS LAST, fetch=10
-06)------TableScan: right_table_no_nulls projection=[a, b]
+03)----Filter: left_table_no_nulls.b IS NOT NULL
+04)------TableScan: left_table_no_nulls projection=[a, b]
+05)--SubqueryAlias: rhs
+06)----Sort: right_table_no_nulls.b ASC NULLS LAST, fetch=10
+07)------TableScan: right_table_no_nulls projection=[a, b]
 physical_plan
 01)CoalesceBatchesExec: target_batch_size=3
 02)--HashJoinExec: mode=CollectLeft, join_type=Right, on=[(b@1, b@1)]
-03)----MemoryExec: partitions=1, partition_sizes=[1]
-04)----SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)------MemoryExec: partitions=1, partition_sizes=[1]
+03)----CoalesceBatchesExec: target_batch_size=3
+04)------FilterExec: b@1 IS NOT NULL
+05)--------MemoryExec: partitions=1, partition_sizes=[1]
+06)----SortExec: TopK(fetch=10), expr=[b@1 ASC NULLS LAST], preserve_partitioning=[false]
+07)------MemoryExec: partitions=1, partition_sizes=[1]
 
 
 # Test CROSS JOIN LATERAL syntax (planning)
diff --git a/datafusion/sqllogictest/test_files/subquery.slt b/datafusion/sqllogictest/test_files/subquery.slt
index 493db62286b6..594365eaa409 100644
--- a/datafusion/sqllogictest/test_files/subquery.slt
+++ b/datafusion/sqllogictest/test_files/subquery.slt
@@ -357,17 +357,20 @@ logical_plan
 01)Sort: customer.c_custkey ASC NULLS LAST
 02)--Projection: customer.c_custkey
 03)----Inner Join: customer.c_custkey = __scalar_sq_1.o_custkey Filter: CAST(customer.c_acctbal AS Decimal128(25, 2)) < __scalar_sq_1.sum(orders.o_totalprice)
-04)------TableScan: customer projection=[c_custkey, c_acctbal]
-05)------SubqueryAlias: __scalar_sq_1
-06)--------Projection: sum(orders.o_totalprice), orders.o_custkey
-07)----------Aggregate: groupBy=[[orders.o_custkey]], aggr=[[sum(orders.o_totalprice)]]
-08)------------Projection: orders.o_custkey, orders.o_totalprice
-09)--------------Inner Join: orders.o_orderkey = __scalar_sq_2.l_orderkey Filter: CAST(orders.o_totalprice AS Decimal128(25, 2)) < __scalar_sq_2.price
-10)----------------TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice]
-11)----------------SubqueryAlias: __scalar_sq_2
-12)------------------Projection: sum(lineitem.l_extendedprice) AS price, lineitem.l_orderkey
-13)--------------------Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_extendedprice)]]
-14)----------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice]
+04)------Filter: customer.c_custkey IS NOT NULL
+05)--------TableScan: customer projection=[c_custkey, c_acctbal], partial_filters=[customer.c_custkey IS NOT NULL]
+06)------SubqueryAlias: __scalar_sq_1
+07)--------Projection: sum(orders.o_totalprice), orders.o_custkey
+08)----------Aggregate: groupBy=[[orders.o_custkey]], aggr=[[sum(orders.o_totalprice)]]
+09)------------Projection: orders.o_custkey, orders.o_totalprice
+10)--------------Inner Join: orders.o_orderkey = __scalar_sq_2.l_orderkey Filter: CAST(orders.o_totalprice AS Decimal128(25, 2)) < __scalar_sq_2.price
+11)----------------Filter: orders.o_custkey IS NOT NULL AND orders.o_orderkey IS NOT NULL
+12)------------------TableScan: orders projection=[o_orderkey, o_custkey, o_totalprice], partial_filters=[orders.o_custkey IS NOT NULL, orders.o_orderkey IS NOT NULL]
+13)----------------SubqueryAlias: __scalar_sq_2
+14)------------------Projection: sum(lineitem.l_extendedprice) AS price, lineitem.l_orderkey
+15)--------------------Aggregate: groupBy=[[lineitem.l_orderkey]], aggr=[[sum(lineitem.l_extendedprice)]]
+16)----------------------Filter: lineitem.l_orderkey IS NOT NULL
+17)------------------------TableScan: lineitem projection=[l_orderkey, l_extendedprice], partial_filters=[lineitem.l_orderkey IS NOT NULL]
 
 # correlated_where_in
 query TT
@@ -379,10 +382,12 @@ where o_orderstatus in (
 logical_plan
 01)Projection: orders.o_orderkey
 02)--LeftSemi Join: orders.o_orderstatus = __correlated_sq_1.l_linestatus, orders.o_orderkey = __correlated_sq_1.l_orderkey
-03)----TableScan: orders projection=[o_orderkey, o_orderstatus]
-04)----SubqueryAlias: __correlated_sq_1
-05)------Projection: lineitem.l_linestatus, lineitem.l_orderkey
-06)--------TableScan: lineitem projection=[l_orderkey, l_linestatus]
+03)----Filter: orders.o_orderstatus IS NOT NULL AND orders.o_orderkey IS NOT NULL
+04)------TableScan: orders projection=[o_orderkey, o_orderstatus], partial_filters=[orders.o_orderstatus IS NOT NULL, orders.o_orderkey IS NOT NULL]
+05)----SubqueryAlias: __correlated_sq_1
+06)------Projection: lineitem.l_linestatus, lineitem.l_orderkey
+07)--------Filter: lineitem.l_linestatus IS NOT NULL AND lineitem.l_orderkey IS NOT NULL
+08)----------TableScan: lineitem projection=[l_orderkey, l_linestatus], partial_filters=[lineitem.l_linestatus IS NOT NULL, lineitem.l_orderkey IS NOT NULL]
 
 query I rowsort
 select o_orderkey from orders
@@ -416,11 +421,12 @@ explain SELECT t1_id, t1_name, t1_int FROM t1 WHERE t1_id IN(SELECT t1_int FROM
 ----
 logical_plan
 01)LeftSemi Join: t1.t1_id = __correlated_sq_1.t1_int
-02)--TableScan: t1 projection=[t1_id, t1_name, t1_int]
-03)--SubqueryAlias: __correlated_sq_1
-04)----Projection: t1.t1_int
-05)------Filter: t1.t1_int < t1.t1_id
-06)--------TableScan: t1 projection=[t1_id, t1_int]
+02)--Filter: t1.t1_id IS NOT NULL
+03)----TableScan: t1 projection=[t1_id, t1_name, t1_int]
+04)--SubqueryAlias: __correlated_sq_1
+05)----Projection: t1.t1_int
+06)------Filter: t1.t1_int IS NOT NULL AND t1.t1_int < t1.t1_id
+07)--------TableScan: t1 projection=[t1_id, t1_int]
 
 #in_subquery_nested_exist_subquery
 query TT
@@ -428,13 +434,15 @@ explain SELECT t1_id, t1_name, t1_int FROM t1 WHERE t1_id IN(SELECT t2_id FROM t
 ----
 logical_plan
 01)LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id
-02)--TableScan: t1 projection=[t1_id, t1_name, t1_int]
-03)--SubqueryAlias: __correlated_sq_1
-04)----Projection: t2.t2_id
-05)------LeftSemi Join:  Filter: __correlated_sq_2.t1_int > t2.t2_int
-06)--------TableScan: t2 projection=[t2_id, t2_int]
-07)--------SubqueryAlias: __correlated_sq_2
-08)----------TableScan: t1 projection=[t1_int]
+02)--Filter: t1.t1_id IS NOT NULL
+03)----TableScan: t1 projection=[t1_id, t1_name, t1_int]
+04)--SubqueryAlias: __correlated_sq_1
+05)----Projection: t2.t2_id
+06)------LeftSemi Join:  Filter: __correlated_sq_2.t1_int > t2.t2_int
+07)--------Filter: t2.t2_id IS NOT NULL
+08)----------TableScan: t2 projection=[t2_id, t2_int]
+09)--------SubqueryAlias: __correlated_sq_2
+10)----------TableScan: t1 projection=[t1_int]
 
 #invalid_scalar_subquery
 statement error DataFusion error: check_analyzed_plan\ncaused by\nError during planning: Scalar subquery should only return one column, but found 2: t2.t2_id, t2.t2_name
@@ -568,13 +576,16 @@ explain SELECT t0_id, t0_name FROM t0 WHERE EXISTS (SELECT 1 FROM t1 INNER JOIN
 ----
 logical_plan
 01)LeftSemi Join: t0.t0_name = __correlated_sq_1.t2_name
-02)--TableScan: t0 projection=[t0_id, t0_name]
-03)--SubqueryAlias: __correlated_sq_1
-04)----Projection: t2.t2_name
-05)------Inner Join: t1.t1_id = t2.t2_id
-06)--------TableScan: t1 projection=[t1_id]
-07)--------SubqueryAlias: t2
-08)----------TableScan: t2 projection=[t2_id, t2_name]
+02)--Filter: t0.t0_name IS NOT NULL
+03)----TableScan: t0 projection=[t0_id, t0_name]
+04)--SubqueryAlias: __correlated_sq_1
+05)----Projection: t2.t2_name
+06)------Inner Join: t1.t1_id = t2.t2_id
+07)--------Filter: t1.t1_id IS NOT NULL
+08)----------TableScan: t1 projection=[t1_id]
+09)--------SubqueryAlias: t2
+10)----------Filter: t2.t2_name IS NOT NULL AND t2.t2_id IS NOT NULL
+11)------------TableScan: t2 projection=[t2_id, t2_name]
 
 #subquery_contains_join_contains_sub_query_alias_correlated_columns
 query TT
@@ -582,14 +593,17 @@ explain SELECT t0_id, t0_name FROM t0 WHERE EXISTS (select 1 from (SELECT * FROM
 ----
 logical_plan
 01)LeftSemi Join: t0.t0_id = __correlated_sq_1.t1_id, t0.t0_name = __correlated_sq_1.t2_name
-02)--TableScan: t0 projection=[t0_id, t0_name]
-03)--SubqueryAlias: __correlated_sq_1
-04)----Projection: x.t1_id, y.t2_name
-05)------Inner Join: x.t1_id = y.t2_id
-06)--------SubqueryAlias: x
-07)----------TableScan: t1 projection=[t1_id]
-08)--------SubqueryAlias: y
-09)----------TableScan: t2 projection=[t2_id, t2_name]
+02)--Filter: t0.t0_id IS NOT NULL AND t0.t0_name IS NOT NULL
+03)----TableScan: t0 projection=[t0_id, t0_name]
+04)--SubqueryAlias: __correlated_sq_1
+05)----Projection: x.t1_id, y.t2_name
+06)------Inner Join: x.t1_id = y.t2_id
+07)--------SubqueryAlias: x
+08)----------Filter: t1.t1_id IS NOT NULL
+09)------------TableScan: t1 projection=[t1_id]
+10)--------SubqueryAlias: y
+11)----------Filter: t2.t2_name IS NOT NULL AND t2.t2_id IS NOT NULL
+12)------------TableScan: t2 projection=[t2_id, t2_name]
 
 #support_order_by_correlated_columns
 query TT
@@ -622,9 +636,11 @@ explain SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id
 ----
 logical_plan
 01)LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id
-02)--TableScan: t1 projection=[t1_id, t1_name]
-03)--SubqueryAlias: __correlated_sq_1
-04)----TableScan: t2 projection=[t2_id]
+02)--Filter: t1.t1_id IS NOT NULL
+03)----TableScan: t1 projection=[t1_id, t1_name]
+04)--SubqueryAlias: __correlated_sq_1
+05)----Filter: t2.t2_id IS NOT NULL
+06)------TableScan: t2 projection=[t2_id]
 
 query IT rowsort
 SELECT t1_id, t1_name FROM t1 WHERE EXISTS (SELECT * FROM t2 WHERE t2_id = t1_id limit 1)
@@ -681,10 +697,12 @@ explain SELECT t1_id, t1_name FROM t1 WHERE t1_id in (SELECT t2_id FROM t2 limit
 ----
 logical_plan
 01)LeftSemi Join: t1.t1_id = __correlated_sq_1.t2_id
-02)--TableScan: t1 projection=[t1_id, t1_name]
-03)--SubqueryAlias: __correlated_sq_1
-04)----Limit: skip=0, fetch=10
-05)------TableScan: t2 projection=[t2_id], fetch=10
+02)--Filter: t1.t1_id IS NOT NULL
+03)----TableScan: t1 projection=[t1_id, t1_name]
+04)--SubqueryAlias: __correlated_sq_1
+05)----Filter: t2.t2_id IS NOT NULL
+06)------Limit: skip=0, fetch=10
+07)--------TableScan: t2 projection=[t2_id], fetch=10
 
 
 #uncorrelated_scalar_subquery_with_limit0
@@ -787,7 +805,9 @@ logical_plan
 04)----SubqueryAlias: __scalar_sq_1
 05)------Projection: count(*), t2.t2_int, Boolean(true) AS __always_true
 06)--------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-07)----------TableScan: t2 projection=[t2_int]
+07)----------Filter: t2.t2_int IS NOT NULL
+08)------------TableScan: t2 projection=[t2_int]
+
 
 query II rowsort
 SELECT t1_id, (SELECT count(*) FROM t2 WHERE t2.t2_int = t1.t1_int) from t1
@@ -809,7 +829,8 @@ logical_plan
 04)----SubqueryAlias: __scalar_sq_1
 05)------Projection: count(*), t2.t2_int, Boolean(true) AS __always_true
 06)--------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-07)----------TableScan: t2 projection=[t2_int]
+07)----------Filter: t2.t2_int IS NOT NULL
+08)------------TableScan: t2 projection=[t2_int]
 
 query II rowsort
 SELECT t1_id, (SELECT count(*) FROM t2 WHERE t2.t2_int = t1.t1_int) as cnt from t1
@@ -830,7 +851,8 @@ logical_plan
 04)----SubqueryAlias: __scalar_sq_1
 05)------Projection: count(*) AS _cnt, t2.t2_int, Boolean(true) AS __always_true
 06)--------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-07)----------TableScan: t2 projection=[t2_int]
+07)----------Filter: t2.t2_int IS NOT NULL
+08)------------TableScan: t2 projection=[t2_int]
 
 query II rowsort
 SELECT t1_id, (SELECT count(*) as _cnt FROM t2 WHERE t2.t2_int = t1.t1_int) as cnt from t1
@@ -851,7 +873,8 @@ logical_plan
 04)----SubqueryAlias: __scalar_sq_1
 05)------Projection: count(*) + Int64(2) AS _cnt, t2.t2_int, Boolean(true) AS __always_true
 06)--------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-07)----------TableScan: t2 projection=[t2_int]
+07)----------Filter: t2.t2_int IS NOT NULL
+08)------------TableScan: t2 projection=[t2_int]
 
 query II rowsort
 SELECT t1_id, (SELECT count(*) + 2 as _cnt FROM t2 WHERE t2.t2_int = t1.t1_int) from t1
@@ -874,7 +897,8 @@ logical_plan
 06)--------SubqueryAlias: __scalar_sq_1
 07)----------Projection: count(*), t2.t2_id, Boolean(true) AS __always_true
 08)------------Aggregate: groupBy=[[t2.t2_id]], aggr=[[count(Int64(1)) AS count(*)]]
-09)--------------TableScan: t2 projection=[t2_id]
+09)--------------Filter: t2.t2_id IS NOT NULL
+10)----------------TableScan: t2 projection=[t2_id]
 
 query I rowsort
 select t1.t1_int from t1 where (select count(*) from t2 where t1.t1_id = t2.t2_id) < t1.t1_int
@@ -896,7 +920,8 @@ logical_plan
 05)------Projection: count(*) + Int64(2) AS cnt_plus_2, t2.t2_int
 06)--------Filter: count(*) > Int64(1)
 07)----------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-08)------------TableScan: t2 projection=[t2_int]
+08)------------Filter: t2.t2_int IS NOT NULL
+09)--------------TableScan: t2 projection=[t2_int]
 
 query II rowsort
 SELECT t1_id, (SELECT count(*) + 2 as cnt_plus_2 FROM t2 WHERE t2.t2_int = t1.t1_int having count(*) >1) from t1
@@ -918,7 +943,8 @@ logical_plan
 04)----SubqueryAlias: __scalar_sq_1
 05)------Projection: count(*) + Int64(2) AS cnt_plus_2, t2.t2_int, count(*), Boolean(true) AS __always_true
 06)--------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-07)----------TableScan: t2 projection=[t2_int]
+07)----------Filter: t2.t2_int IS NOT NULL
+08)------------TableScan: t2 projection=[t2_int]
 
 query II rowsort
 SELECT t1_id, (SELECT count(*) + 2 as cnt_plus_2 FROM t2 WHERE t2.t2_int = t1.t1_int having count(*) = 0) from t1
@@ -942,7 +968,8 @@ logical_plan
 07)--------SubqueryAlias: __scalar_sq_1
 08)----------Projection: count(*), t2.t2_int, Boolean(true) AS __always_true
 09)------------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-10)--------------TableScan: t2 projection=[t2_int]
+10)--------------Filter: t2.t2_int IS NOT NULL
+11)----------------TableScan: t2 projection=[t2_int]
 
 query I rowsort
 select t1.t1_int from t1 group by t1.t1_int having (select count(*) from t2 where t1.t1_int = t2.t2_int) = 0
@@ -963,7 +990,8 @@ logical_plan
 06)--------SubqueryAlias: __scalar_sq_1
 07)----------Projection: count(*) AS cnt, t2.t2_int, Boolean(true) AS __always_true
 08)------------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-09)--------------TableScan: t2 projection=[t2_int]
+09)--------------Filter: t2.t2_int IS NOT NULL
+10)----------------TableScan: t2 projection=[t2_int]
 
 
 query I rowsort
@@ -993,7 +1021,8 @@ logical_plan
 06)--------SubqueryAlias: __scalar_sq_1
 07)----------Projection: count(*) + Int64(1) + Int64(1) AS cnt_plus_two, t2.t2_int, count(*), Boolean(true) AS __always_true
 08)------------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-09)--------------TableScan: t2 projection=[t2_int]
+09)--------------Filter: t2.t2_int IS NOT NULL
+10)----------------TableScan: t2 projection=[t2_int]
 
 query I rowsort
 select t1.t1_int from t1 where (
@@ -1022,7 +1051,8 @@ logical_plan
 06)--------SubqueryAlias: __scalar_sq_1
 07)----------Projection: CASE WHEN count(*) = Int64(1) THEN Int64(NULL) ELSE count(*) END AS cnt, t2.t2_int, Boolean(true) AS __always_true
 08)------------Aggregate: groupBy=[[t2.t2_int]], aggr=[[count(Int64(1)) AS count(*)]]
-09)--------------TableScan: t2 projection=[t2_int]
+09)--------------Filter: t2.t2_int IS NOT NULL
+10)----------------TableScan: t2 projection=[t2_int]
 
 
 query I rowsort
diff --git a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
index 92507aaf947f..a4a210382737 100644
--- a/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/create_tables.slt.part
@@ -30,7 +30,7 @@ CREATE EXTERNAL TABLE IF NOT EXISTS supplier (
         s_phone VARCHAR NOT NULL,
         s_acctbal DECIMAL(15, 2) NOT NULL,
         s_comment VARCHAR NOT NULL,
-        NOT NULL VARCHAR NOT NULL,
+        s_rev VARCHAR NOT NULL,
 ) STORED AS CSV LOCATION 'test_files/tpch/data/supplier.tbl' OPTIONS ('format.delimiter' '|', 'format.has_header' 'false');
 
 statement ok

From 4427a9f2b0070ebc57487e603ffea11e502984d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 21:13:19 +0200
Subject: [PATCH 19/22] Wip

---
 .../tests/cases/roundtrip_logical_plan.rs          | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
index cc353ab36d97..ed8ba55e802d 100644
--- a/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_logical_plan.rs
@@ -491,8 +491,10 @@ async fn roundtrip_exists_filter() -> Result<()> {
         "SELECT b FROM data d1 WHERE EXISTS (SELECT * FROM data2 d2 WHERE d2.a = d1.a AND d2.e != d1.e)",
         "Projection: data.b\
         \n  LeftSemi Join: data.a = data2.a Filter: data2.e != CAST(data.e AS Int64)\
-        \n    TableScan: data projection=[a, b, e]\
-        \n    TableScan: data2 projection=[a, e]",
+        \n    Filter: data.a IS NOT NULL\
+        \n      TableScan: data projection=[a, b, e], partial_filters=[data.a IS NOT NULL]\
+        \n    Filter: data2.a IS NOT NULL\
+        \n      TableScan: data2 projection=[a, e], partial_filters=[data2.a IS NOT NULL]",
         false // "d1" vs "data" field qualifier
     ).await
 }
@@ -502,9 +504,11 @@ async fn inner_join() -> Result<()> {
     assert_expected_plan(
         "SELECT data.a FROM data JOIN data2 ON data.a = data2.a",
         "Projection: data.a\
-         \n  Inner Join: data.a = data2.a\
-         \n    TableScan: data projection=[a]\
-         \n    TableScan: data2 projection=[a]",
+        \n  Inner Join: data.a = data2.a\
+        \n    Filter: data.a IS NOT NULL\
+        \n      TableScan: data projection=[a], partial_filters=[data.a IS NOT NULL]\
+        \n    Filter: data2.a IS NOT NULL\
+        \n      TableScan: data2 projection=[a], partial_filters=[data2.a IS NOT NULL]",
         true,
     )
     .await

From 3645f41cdc713d08ad9c8c41bfdd7142df6022bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 21:15:08 +0200
Subject: [PATCH 20/22] Wipc

---
 .../sqllogictest/test_files/tpch/q15.slt.part | 71 ++++++++++---------
 1 file changed, 39 insertions(+), 32 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/tpch/q15.slt.part b/datafusion/sqllogictest/test_files/tpch/q15.slt.part
index 2374fd8430a4..83217ac86b71 100644
--- a/datafusion/sqllogictest/test_files/tpch/q15.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q15.slt.part
@@ -58,18 +58,20 @@ logical_plan
 06)----------TableScan: supplier projection=[s_suppkey, s_name, s_address, s_phone]
 07)----------SubqueryAlias: revenue0
 08)------------Projection: lineitem.l_suppkey AS supplier_no, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
-09)--------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
-10)----------------Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount
-11)------------------Filter: lineitem.l_shipdate >= Date32("1996-01-01") AND lineitem.l_shipdate < Date32("1996-04-01")
-12)--------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1996-01-01"), lineitem.l_shipdate < Date32("1996-04-01")]
-13)------SubqueryAlias: __scalar_sq_1
-14)--------Aggregate: groupBy=[[]], aggr=[[max(revenue0.total_revenue)]]
-15)----------SubqueryAlias: revenue0
-16)------------Projection: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
-17)--------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
-18)----------------Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount
-19)------------------Filter: lineitem.l_shipdate >= Date32("1996-01-01") AND lineitem.l_shipdate < Date32("1996-04-01")
-20)--------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1996-01-01"), lineitem.l_shipdate < Date32("1996-04-01")]
+09)--------------Filter: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) IS NOT NULL
+10)----------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+11)------------------Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount
+12)--------------------Filter: lineitem.l_shipdate >= Date32("1996-01-01") AND lineitem.l_shipdate < Date32("1996-04-01")
+13)----------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1996-01-01"), lineitem.l_shipdate < Date32("1996-04-01")]
+14)------SubqueryAlias: __scalar_sq_1
+15)--------Filter: max(revenue0.total_revenue) IS NOT NULL
+16)----------Aggregate: groupBy=[[]], aggr=[[max(revenue0.total_revenue)]]
+17)------------SubqueryAlias: revenue0
+18)--------------Projection: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount) AS total_revenue
+19)----------------Aggregate: groupBy=[[lineitem.l_suppkey]], aggr=[[sum(lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount)) AS sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]]
+20)------------------Projection: lineitem.l_suppkey, lineitem.l_extendedprice, lineitem.l_discount
+21)--------------------Filter: lineitem.l_shipdate >= Date32("1996-01-01") AND lineitem.l_shipdate < Date32("1996-04-01")
+22)----------------------TableScan: lineitem projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], partial_filters=[lineitem.l_shipdate >= Date32("1996-01-01"), lineitem.l_shipdate < Date32("1996-04-01")]
 physical_plan
 01)SortPreservingMergeExec: [s_suppkey@0 ASC NULLS LAST]
 02)--SortExec: expr=[s_suppkey@0 ASC NULLS LAST], preserve_partitioning=[true]
@@ -84,26 +86,31 @@ physical_plan
 11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 12)----------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_phone], has_header=false
 13)----------------ProjectionExec: expr=[l_suppkey@0 as supplier_no, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
-14)------------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-15)--------------------CoalesceBatchesExec: target_batch_size=8192
-16)----------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
-17)------------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-18)--------------------------CoalesceBatchesExec: target_batch_size=8192
-19)----------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
-20)------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], has_header=false
-21)--------CoalesceBatchesExec: target_batch_size=8192
-22)----------RepartitionExec: partitioning=Hash([max(revenue0.total_revenue)@0], 4), input_partitions=1
-23)------------AggregateExec: mode=Final, gby=[], aggr=[max(revenue0.total_revenue)]
-24)--------------CoalescePartitionsExec
-25)----------------AggregateExec: mode=Partial, gby=[], aggr=[max(revenue0.total_revenue)]
-26)------------------ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
-27)--------------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-28)----------------------CoalesceBatchesExec: target_batch_size=8192
-29)------------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
-30)--------------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-31)----------------------------CoalesceBatchesExec: target_batch_size=8192
-32)------------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
-33)--------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], has_header=false
+14)------------------CoalesceBatchesExec: target_batch_size=8192
+15)--------------------FilterExec: sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 IS NOT NULL
+16)----------------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+17)------------------------CoalesceBatchesExec: target_batch_size=8192
+18)--------------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
+19)----------------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+20)------------------------------CoalesceBatchesExec: target_batch_size=8192
+21)--------------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
+22)----------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], has_header=false
+23)--------CoalesceBatchesExec: target_batch_size=8192
+24)----------RepartitionExec: partitioning=Hash([max(revenue0.total_revenue)@0], 4), input_partitions=4
+25)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+26)--------------CoalesceBatchesExec: target_batch_size=8192
+27)----------------FilterExec: max(revenue0.total_revenue)@0 IS NOT NULL
+28)------------------AggregateExec: mode=Final, gby=[], aggr=[max(revenue0.total_revenue)]
+29)--------------------CoalescePartitionsExec
+30)----------------------AggregateExec: mode=Partial, gby=[], aggr=[max(revenue0.total_revenue)]
+31)------------------------ProjectionExec: expr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
+32)--------------------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+33)----------------------------CoalesceBatchesExec: target_batch_size=8192
+34)------------------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
+35)--------------------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+36)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+37)------------------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
+38)--------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], has_header=false
 
 query ITTTR
 with revenue0 (supplier_no, total_revenue) as (

From 54b344e188cfc5acd804d067087dfae6100359bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Sat, 7 Sep 2024 21:38:21 +0200
Subject: [PATCH 21/22] Wip

---
 .../sqllogictest/test_files/tpch/q2.slt.part  | 79 ++++++++++---------
 1 file changed, 41 insertions(+), 38 deletions(-)

diff --git a/datafusion/sqllogictest/test_files/tpch/q2.slt.part b/datafusion/sqllogictest/test_files/tpch/q2.slt.part
index 17f3b78a089d..6496f453fe66 100644
--- a/datafusion/sqllogictest/test_files/tpch/q2.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/q2.slt.part
@@ -85,19 +85,20 @@ logical_plan
 20)--------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
 21)------SubqueryAlias: __scalar_sq_1
 22)--------Projection: min(partsupp.ps_supplycost), partsupp.ps_partkey
-23)----------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]]
-24)------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost
-25)--------------Inner Join: nation.n_regionkey = region.r_regionkey
-26)----------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey
-27)------------------Inner Join: supplier.s_nationkey = nation.n_nationkey
-28)--------------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost, supplier.s_nationkey
-29)----------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
-30)------------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
-31)------------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
-32)--------------------TableScan: nation projection=[n_nationkey, n_regionkey]
-33)----------------Projection: region.r_regionkey
-34)------------------Filter: region.r_name = Utf8("EUROPE")
-35)--------------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
+23)----------Filter: min(partsupp.ps_supplycost) IS NOT NULL
+24)------------Aggregate: groupBy=[[partsupp.ps_partkey]], aggr=[[min(partsupp.ps_supplycost)]]
+25)--------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost
+26)----------------Inner Join: nation.n_regionkey = region.r_regionkey
+27)------------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost, nation.n_regionkey
+28)--------------------Inner Join: supplier.s_nationkey = nation.n_nationkey
+29)----------------------Projection: partsupp.ps_partkey, partsupp.ps_supplycost, supplier.s_nationkey
+30)------------------------Inner Join: partsupp.ps_suppkey = supplier.s_suppkey
+31)--------------------------TableScan: partsupp projection=[ps_partkey, ps_suppkey, ps_supplycost]
+32)--------------------------TableScan: supplier projection=[s_suppkey, s_nationkey]
+33)----------------------TableScan: nation projection=[n_nationkey, n_regionkey]
+34)------------------Projection: region.r_regionkey
+35)--------------------Filter: region.r_name = Utf8("EUROPE")
+36)----------------------TableScan: region projection=[r_regionkey, r_name], partial_filters=[region.r_name = Utf8("EUROPE")]
 physical_plan
 01)SortPreservingMergeExec: [s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], fetch=10
 02)--SortExec: TopK(fetch=10), expr=[s_acctbal@0 DESC,n_name@2 ASC NULLS LAST,s_name@1 ASC NULLS LAST,p_partkey@3 ASC NULLS LAST], preserve_partitioning=[true]
@@ -147,37 +148,39 @@ physical_plan
 46)----------CoalesceBatchesExec: target_batch_size=8192
 47)------------RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4
 48)--------------ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
-49)----------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
-50)------------------CoalesceBatchesExec: target_batch_size=8192
-51)--------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-52)----------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
-53)------------------------CoalesceBatchesExec: target_batch_size=8192
-54)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@2, r_regionkey@0)], projection=[ps_partkey@0, ps_supplycost@1]
+49)----------------CoalesceBatchesExec: target_batch_size=8192
+50)------------------FilterExec: min(partsupp.ps_supplycost)@1 IS NOT NULL
+51)--------------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
+52)----------------------CoalesceBatchesExec: target_batch_size=8192
+53)------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+54)--------------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
 55)----------------------------CoalesceBatchesExec: target_batch_size=8192
-56)------------------------------RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4
+56)------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@2, r_regionkey@0)], projection=[ps_partkey@0, ps_supplycost@1]
 57)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-58)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_partkey@0, ps_supplycost@1, n_regionkey@4]
+58)----------------------------------RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4
 59)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-60)--------------------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+60)--------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_partkey@0, ps_supplycost@1, n_regionkey@4]
 61)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
-62)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_supplycost@2, s_nationkey@4]
+62)------------------------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
 63)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-64)----------------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
-65)------------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], has_header=false
-66)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-67)----------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-68)------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-69)--------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
-70)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-71)--------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-72)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-73)------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], has_header=false
-74)----------------------------CoalesceBatchesExec: target_batch_size=8192
-75)------------------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+64)----------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_supplycost@2, s_nationkey@4]
+65)------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+66)--------------------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
+67)----------------------------------------------------CsvExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], has_header=false
+68)------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+69)--------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
+70)----------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+71)------------------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], has_header=false
+72)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
+73)------------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+74)--------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+75)----------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], has_header=false
 76)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-77)----------------------------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
-78)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-79)--------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], has_header=false
+77)----------------------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+78)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+79)--------------------------------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
+80)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+81)------------------------------------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], has_header=false
 
 
 

From f56293c65ac5e284bf3a0c42ac8741579ece5aa3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dani=C3=ABl=20Heres?= <danielheres@gmail.com>
Date: Mon, 9 Sep 2024 20:35:17 +0200
Subject: [PATCH 22/22] Wip bench

---
 datafusion/core/benches/sql_query_with_io.rs | 40 +++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/datafusion/core/benches/sql_query_with_io.rs b/datafusion/core/benches/sql_query_with_io.rs
index aef39a04e47e..e6eb95001374 100644
--- a/datafusion/core/benches/sql_query_with_io.rs
+++ b/datafusion/core/benches/sql_query_with_io.rs
@@ -42,7 +42,7 @@ use rand::{rngs::StdRng, Rng, SeedableRng};
 use tokio::runtime::Runtime;
 use url::Url;
 
-const THREADS: usize = 4;
+const THREADS: usize = 10;
 const TABLES: usize = 3;
 const TABLE_PARTITIONS: usize = 10;
 const PARTITION_FILES: usize = 2;
@@ -58,6 +58,7 @@ fn schema() -> SchemaRef {
     Arc::new(Schema::new(vec![
         Field::new("id", DataType::UInt64, false),
         Field::new("payload", DataType::Int64, false),
+        Field::new("optional_id", DataType::UInt64, true),
     ]))
 }
 
@@ -65,15 +66,23 @@ fn create_parquet_file(rng: &mut StdRng, id_offset: usize) -> Bytes {
     let schema = schema();
     let mut id_builder = UInt64Builder::new();
     let mut payload_builder = Int64Builder::new();
+    let mut optional_id_builder = UInt64Builder::new();
+
     for row in 0..FILE_ROWS {
         id_builder.append_value((row + id_offset) as u64);
         payload_builder.append_value(rng.gen());
+        if row % 2 == 0 {
+            optional_id_builder.append_null();
+        } else {
+            optional_id_builder.append_value((row + id_offset) as u64);
+        }
     }
     let batch = RecordBatch::try_new(
         Arc::clone(&schema),
         vec![
             Arc::new(id_builder.finish()),
             Arc::new(payload_builder.finish()),
+            Arc::new(optional_id_builder.finish()),
         ],
     )
     .unwrap();
@@ -256,6 +265,35 @@ fn criterion_benchmark(c: &mut Criterion) {
         &format!("{join_query} WHERE {table0_name}.partition = 0"),
         PARTITION_FILES * FILE_ROWS,
     );
+
+    let mut join_query = "SELECT * FROM".to_owned();
+    for table_id in 0..TABLES {
+        let table_name = table_name(table_id);
+        if table_id == 0 {
+            write!(join_query, " {table_name}").unwrap();
+        } else {
+            write!(
+                join_query,
+                " INNER JOIN {table_name} on {table_name}.optional_id = {table0_name}.id AND {table_name}.partition = {table0_name}.partition",
+            ).unwrap();
+        }
+    }
+    bench_query(
+        c,
+        &ctx,
+        &rt,
+        "IO: INNER JOIN (nullable), all tables, all partitions",
+        &join_query,
+        TABLE_PARTITIONS * PARTITION_FILES * FILE_ROWS / 2,
+    );
+    bench_query(
+        c,
+        &ctx,
+        &rt,
+        "IO: INNER JOIN (nullable), all tables, single partition",
+        &format!("{join_query} WHERE {table0_name}.partition = 0"),
+        PARTITION_FILES * FILE_ROWS / 2,
+    );
 }
 
 criterion_group!(benches, criterion_benchmark);