Merge branch 'apache:main' into add-greatest

apache · Sep 15, 2024 · 7d63bd2 · 7d63bd2
2 parents 69761df + 7bd7747
commit 7d63bd2
Show file tree

Hide file tree

Showing 7 changed files with 131 additions and 82 deletions.
diff --git a/datafusion/sql/src/expr/value.rs b/datafusion/sql/src/expr/value.rs
@@ -16,7 +16,9 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use arrow::compute::kernels::cast_utils::parse_interval_month_day_nano;
+use arrow::compute::kernels::cast_utils::{
+    parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit,
+};
 use arrow::datatypes::DECIMAL128_MAX_PRECISION;
 use arrow_schema::DataType;
 use datafusion_common::{
@@ -232,27 +234,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
         let value = interval_literal(*interval.value, negative)?;
 
-        let value = if has_units(&value) {
-            // If the interval already contains a unit
-            // `INTERVAL '5 month' rather than `INTERVAL '5' month`
-            // skip the other unit
-            value
-        } else {
-            // leading_field really means the unit if specified
-            // for example, "month" in  `INTERVAL '5' month`
-            match interval.leading_field.as_ref() {
-                Some(leading_field) => {
-                    format!("{value} {leading_field}")
-                }
-                None => {
-                    // default to seconds for the units
-                    // `INTERVAL '5' is parsed as '5 seconds'
-                    format!("{value} seconds")
-                }
-            }
+        // leading_field really means the unit if specified
+        // for example, "month" in  `INTERVAL '5' month`
+        let value = match interval.leading_field.as_ref() {
+            Some(leading_field) => format!("{value} {leading_field}"),
+            None => value,
         };
 
-        let val = parse_interval_month_day_nano(&value)?;
+        let config = IntervalParseConfig::new(IntervalUnit::Second);
+        let val = parse_interval_month_day_nano_config(&value, config)?;
         Ok(lit(ScalarValue::IntervalMonthDayNano(Some(val))))
     }
 }
@@ -292,35 +282,6 @@ fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result<String> {
     }
 }
 
-// TODO make interval parsing better in arrow-rs / expose `IntervalType`
-fn has_units(val: &str) -> bool {
-    let val = val.to_lowercase();
-    val.ends_with("century")
-        || val.ends_with("centuries")
-        || val.ends_with("decade")
-        || val.ends_with("decades")
-        || val.ends_with("year")
-        || val.ends_with("years")
-        || val.ends_with("month")
-        || val.ends_with("months")
-        || val.ends_with("week")
-        || val.ends_with("weeks")
-        || val.ends_with("day")
-        || val.ends_with("days")
-        || val.ends_with("hour")
-        || val.ends_with("hours")
-        || val.ends_with("minute")
-        || val.ends_with("minutes")
-        || val.ends_with("second")
-        || val.ends_with("seconds")
-        || val.ends_with("millisecond")
-        || val.ends_with("milliseconds")
-        || val.ends_with("microsecond")
-        || val.ends_with("microseconds")
-        || val.ends_with("nanosecond")
-        || val.ends_with("nanoseconds")
-}
-
 /// Try to decode bytes from hex literal string.
 ///
 /// None will be returned if the input literal is hex-invalid.

diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
@@ -253,6 +253,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 order_by,
                 partition_by,
                 cluster_by,
+                clustered_by,
                 options,
                 strict,
                 copy_grants,
@@ -346,6 +347,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 if cluster_by.is_some() {
                     return not_impl_err!("Cluster by not supported")?;
                 }
+                if clustered_by.is_some() {
+                    return not_impl_err!("Clustered by not supported")?;
+                }
                 if options.is_some() {
                     return not_impl_err!("Options not supported")?;
                 }

diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
@@ -17,7 +17,9 @@
 
 use crate::unparser::utils::unproject_agg_exprs;
 use datafusion_common::{
-    internal_err, not_impl_err, plan_err, Column, DataFusionError, Result, TableReference,
+    internal_err, not_impl_err, plan_err,
+    tree_node::{TransformedResult, TreeNode},
+    Column, DataFusionError, Result, TableReference,
 };
 use datafusion_expr::{
     expr::Alias, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan,
@@ -34,7 +36,7 @@ use super::{
     rewrite::{
         inject_column_aliases, normalize_union_schema,
         rewrite_plan_for_sort_on_non_projected_fields,
-        subquery_alias_inner_query_and_columns,
+        subquery_alias_inner_query_and_columns, TableAliasRewriter,
     },
     utils::{find_agg_node_within_select, unproject_window_exprs, AggVariant},
     Unparser,
@@ -554,13 +556,11 @@ impl Unparser<'_> {
     ) -> Result<LogicalPlan> {
         match plan {
             LogicalPlan::TableScan(table_scan) => {
-                // TODO: support filters for table scan with alias. Remove this check after #12368 issue.
-                // see the issue: https://github.com/apache/datafusion/issues/12368
-                if alias.is_some() && !table_scan.filters.is_empty() {
-                    return not_impl_err!(
-                        "Subquery alias is not supported for table scan with pushdown filters"
-                    );
-                }
+                let mut filter_alias_rewriter =
+                    alias.as_ref().map(|alias_name| TableAliasRewriter {
+                        table_schema: table_scan.source.schema(),
+                        alias_name: alias_name.clone(),
+                    });
 
                 let mut builder = LogicalPlanBuilder::scan(
                     table_scan.table_name.clone(),
@@ -587,12 +587,25 @@ impl Unparser<'_> {
                     builder = builder.project(project_columns)?;
                 }
 
-                let filter_expr = table_scan
+                let filter_expr: Result<Option<Expr>> = table_scan
                     .filters
                     .iter()
                     .cloned()
-                    .reduce(|acc, expr| acc.and(expr));
-                if let Some(filter) = filter_expr {
+                    .map(|expr| {
+                        if let Some(ref mut rewriter) = filter_alias_rewriter {
+                            expr.rewrite(rewriter).data()
+                        } else {
+                            Ok(expr)
+                        }
+                    })
+                    .reduce(|acc, expr_result| {
+                        acc.and_then(|acc_expr| {
+                            expr_result.map(|expr| acc_expr.and(expr))
+                        })
+                    })
+                    .transpose();
+
+                if let Some(filter) = filter_expr? {
                     builder = builder.filter(filter)?;
                 }
 

diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
@@ -20,9 +20,10 @@ use std::{
     sync::Arc,
 };
 
+use arrow_schema::SchemaRef;
 use datafusion_common::{
-    tree_node::{Transformed, TransformedResult, TreeNode},
-    Result,
+    tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
+    Column, Result, TableReference,
 };
 use datafusion_expr::{expr::Alias, tree_node::transform_sort_vec};
 use datafusion_expr::{Expr, LogicalPlan, Projection, Sort, SortExpr};
@@ -300,3 +301,38 @@ fn find_projection(logical_plan: &LogicalPlan) -> Option<&Projection> {
         _ => None,
     }
 }
+/// A `TreeNodeRewriter` implementation that rewrites `Expr::Column` expressions by
+/// replacing the column's name with an alias if the column exists in the provided schema.
+///
+/// This is typically used to apply table aliases in query plans, ensuring that
+/// the column references in the expressions use the correct table alias.
+///
+/// # Fields
+///
+/// * `table_schema`: The schema (`SchemaRef`) representing the table structure
+///   from which the columns are referenced. This is used to look up columns by their names.
+/// * `alias_name`: The alias (`TableReference`) that will replace the table name
+///   in the column references when applicable.
+pub struct TableAliasRewriter {
+    pub table_schema: SchemaRef,
+    pub alias_name: TableReference,
+}
+
+impl TreeNodeRewriter for TableAliasRewriter {
+    type Node = Expr;
+
+    fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
+        match expr {
+            Expr::Column(column) => {
+                if let Ok(field) = self.table_schema.field_with_name(&column.name) {
+                    let new_column =
+                        Column::new(Some(self.alias_name.clone()), field.name().clone());
+                    Ok(Transformed::yes(Expr::Column(new_column)))
+                } else {
+                    Ok(Transformed::no(Expr::Column(column)))
+                }
+            }
+            _ => Ok(Transformed::no(expr)),
+        }
+    }
+}
diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs
@@ -705,19 +705,19 @@ fn test_table_scan_pushdown() -> Result<()> {
         "SELECT * FROM t1 WHERE ((t1.id > 1) AND (t1.age < 2))"
     );
 
-    // TODO: support filters for table scan with alias. Enable this test after #12368 issue is fixed
-    // see the issue: https://github.com/apache/datafusion/issues/12368
-    // let table_scan_with_filter_alias = table_scan_with_filters(
-    //     Some("t1"),
-    //     &schema,
-    //     None,
-    //     vec![col("id").gt(col("age"))],
-    // )?.alias("ta")?.build()?;
-    // let table_scan_with_filter_alias = plan_to_sql(&table_scan_with_filter_alias)?;
-    // assert_eq!(
-    //     format!("{}", table_scan_with_filter_alias),
-    //     "SELECT * FROM t1 AS ta WHERE (ta.id > ta.age)"
-    // );
+    let table_scan_with_filter_alias = table_scan_with_filters(
+        Some("t1"),
+        &schema,
+        None,
+        vec![col("id").gt(col("age"))],
+    )?
+    .alias("ta")?
+    .build()?;
+    let table_scan_with_filter_alias = plan_to_sql(&table_scan_with_filter_alias)?;
+    assert_eq!(
+        format!("{}", table_scan_with_filter_alias),
+        "SELECT * FROM t1 AS ta WHERE (ta.id > ta.age)"
+    );
 
     let table_scan_with_projection_and_filter = table_scan_with_filters(
         Some("t1"),

diff --git a/datafusion/sqllogictest/test_files/expr.slt b/datafusion/sqllogictest/test_files/expr.slt
@@ -229,12 +229,6 @@ SELECT interval '5 day'
 ----
 5 days
 
-# Hour is ignored, this matches PostgreSQL
-query ?
-SELECT interval '5 day' hour
-----
-5 days
-
 query ?
 SELECT interval '5 day 4 hours 3 minutes 2 seconds 100 milliseconds'
 ----

diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt
@@ -486,6 +486,31 @@ select i - d from t;
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
 select i - ts from t;
 
+# interval unit abreiviation and plurals
+query ?
+select interval '1s'
+----
+1.000000000 secs
+
+query ?
+select '1s'::interval
+----
+1.000000000 secs
+
+query ?
+select interval'1sec'
+----
+1.000000000 secs
+
+query ?
+select interval '1ms'
+----
+0.001000000 secs
+
+query ?
+select interval '1 y' + interval '1 year'
+----
+24 mons
 
 # interval (scalar) + date / timestamp (array)
 query D
@@ -502,6 +527,22 @@ select '1 month'::interval + ts from t;
 2000-02-01T12:11:10
 2000-03-01T00:00:00
 
+# trailing extra unit, this matches PostgreSQL
+query ?
+select interval '5 day 1' hour
+----
+5 days 1 hours
+
+# trailing extra unit, this matches PostgreSQL
+query ?
+select interval '5 day 0' hour
+----
+5 days
+
+# This is interpreted as "0 hours" with PostgreSQL, should be fixed with
+query error DataFusion error: Arrow error: Parser error: Invalid input syntax for type interval: "5 day HOUR"
+SELECT interval '5 day' hour
+
 # expected error interval (scalar) - date / timestamp (array)
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select '1 month'::interval - d from t;