Skip to content

Commit

Permalink
Merge branch 'apache:main' into add-greatest
Browse files Browse the repository at this point in the history
  • Loading branch information
rluvaton committed Sep 15, 2024
2 parents 69761df + 7bd7747 commit 7d63bd2
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 82 deletions.
59 changes: 10 additions & 49 deletions datafusion/sql/src/expr/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
// under the License.

use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
use arrow::compute::kernels::cast_utils::parse_interval_month_day_nano;
use arrow::compute::kernels::cast_utils::{
parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit,
};
use arrow::datatypes::DECIMAL128_MAX_PRECISION;
use arrow_schema::DataType;
use datafusion_common::{
Expand Down Expand Up @@ -232,27 +234,15 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {

let value = interval_literal(*interval.value, negative)?;

let value = if has_units(&value) {
// If the interval already contains a unit
// `INTERVAL '5 month' rather than `INTERVAL '5' month`
// skip the other unit
value
} else {
// leading_field really means the unit if specified
// for example, "month" in `INTERVAL '5' month`
match interval.leading_field.as_ref() {
Some(leading_field) => {
format!("{value} {leading_field}")
}
None => {
// default to seconds for the units
// `INTERVAL '5' is parsed as '5 seconds'
format!("{value} seconds")
}
}
// leading_field really means the unit if specified
// for example, "month" in `INTERVAL '5' month`
let value = match interval.leading_field.as_ref() {
Some(leading_field) => format!("{value} {leading_field}"),
None => value,
};

let val = parse_interval_month_day_nano(&value)?;
let config = IntervalParseConfig::new(IntervalUnit::Second);
let val = parse_interval_month_day_nano_config(&value, config)?;
Ok(lit(ScalarValue::IntervalMonthDayNano(Some(val))))
}
}
Expand Down Expand Up @@ -292,35 +282,6 @@ fn interval_literal(interval_value: SQLExpr, negative: bool) -> Result<String> {
}
}

// TODO make interval parsing better in arrow-rs / expose `IntervalType`
fn has_units(val: &str) -> bool {
let val = val.to_lowercase();
val.ends_with("century")
|| val.ends_with("centuries")
|| val.ends_with("decade")
|| val.ends_with("decades")
|| val.ends_with("year")
|| val.ends_with("years")
|| val.ends_with("month")
|| val.ends_with("months")
|| val.ends_with("week")
|| val.ends_with("weeks")
|| val.ends_with("day")
|| val.ends_with("days")
|| val.ends_with("hour")
|| val.ends_with("hours")
|| val.ends_with("minute")
|| val.ends_with("minutes")
|| val.ends_with("second")
|| val.ends_with("seconds")
|| val.ends_with("millisecond")
|| val.ends_with("milliseconds")
|| val.ends_with("microsecond")
|| val.ends_with("microseconds")
|| val.ends_with("nanosecond")
|| val.ends_with("nanoseconds")
}

/// Try to decode bytes from hex literal string.
///
/// None will be returned if the input literal is hex-invalid.
Expand Down
4 changes: 4 additions & 0 deletions datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
order_by,
partition_by,
cluster_by,
clustered_by,
options,
strict,
copy_grants,
Expand Down Expand Up @@ -346,6 +347,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
if cluster_by.is_some() {
return not_impl_err!("Cluster by not supported")?;
}
if clustered_by.is_some() {
return not_impl_err!("Clustered by not supported")?;
}
if options.is_some() {
return not_impl_err!("Options not supported")?;
}
Expand Down
37 changes: 25 additions & 12 deletions datafusion/sql/src/unparser/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

use crate::unparser::utils::unproject_agg_exprs;
use datafusion_common::{
internal_err, not_impl_err, plan_err, Column, DataFusionError, Result, TableReference,
internal_err, not_impl_err, plan_err,
tree_node::{TransformedResult, TreeNode},
Column, DataFusionError, Result, TableReference,
};
use datafusion_expr::{
expr::Alias, Distinct, Expr, JoinConstraint, JoinType, LogicalPlan,
Expand All @@ -34,7 +36,7 @@ use super::{
rewrite::{
inject_column_aliases, normalize_union_schema,
rewrite_plan_for_sort_on_non_projected_fields,
subquery_alias_inner_query_and_columns,
subquery_alias_inner_query_and_columns, TableAliasRewriter,
},
utils::{find_agg_node_within_select, unproject_window_exprs, AggVariant},
Unparser,
Expand Down Expand Up @@ -554,13 +556,11 @@ impl Unparser<'_> {
) -> Result<LogicalPlan> {
match plan {
LogicalPlan::TableScan(table_scan) => {
// TODO: support filters for table scan with alias. Remove this check after #12368 issue.
// see the issue: https://github.com/apache/datafusion/issues/12368
if alias.is_some() && !table_scan.filters.is_empty() {
return not_impl_err!(
"Subquery alias is not supported for table scan with pushdown filters"
);
}
let mut filter_alias_rewriter =
alias.as_ref().map(|alias_name| TableAliasRewriter {
table_schema: table_scan.source.schema(),
alias_name: alias_name.clone(),
});

let mut builder = LogicalPlanBuilder::scan(
table_scan.table_name.clone(),
Expand All @@ -587,12 +587,25 @@ impl Unparser<'_> {
builder = builder.project(project_columns)?;
}

let filter_expr = table_scan
let filter_expr: Result<Option<Expr>> = table_scan
.filters
.iter()
.cloned()
.reduce(|acc, expr| acc.and(expr));
if let Some(filter) = filter_expr {
.map(|expr| {
if let Some(ref mut rewriter) = filter_alias_rewriter {
expr.rewrite(rewriter).data()
} else {
Ok(expr)
}
})
.reduce(|acc, expr_result| {
acc.and_then(|acc_expr| {
expr_result.map(|expr| acc_expr.and(expr))
})
})
.transpose();

if let Some(filter) = filter_expr? {
builder = builder.filter(filter)?;
}

Expand Down
40 changes: 38 additions & 2 deletions datafusion/sql/src/unparser/rewrite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@ use std::{
sync::Arc,
};

use arrow_schema::SchemaRef;
use datafusion_common::{
tree_node::{Transformed, TransformedResult, TreeNode},
Result,
tree_node::{Transformed, TransformedResult, TreeNode, TreeNodeRewriter},
Column, Result, TableReference,
};
use datafusion_expr::{expr::Alias, tree_node::transform_sort_vec};
use datafusion_expr::{Expr, LogicalPlan, Projection, Sort, SortExpr};
Expand Down Expand Up @@ -300,3 +301,38 @@ fn find_projection(logical_plan: &LogicalPlan) -> Option<&Projection> {
_ => None,
}
}
/// A `TreeNodeRewriter` implementation that rewrites `Expr::Column` expressions by
/// replacing the column's name with an alias if the column exists in the provided schema.
///
/// This is typically used to apply table aliases in query plans, ensuring that
/// the column references in the expressions use the correct table alias.
///
/// # Fields
///
/// * `table_schema`: The schema (`SchemaRef`) representing the table structure
/// from which the columns are referenced. This is used to look up columns by their names.
/// * `alias_name`: The alias (`TableReference`) that will replace the table name
/// in the column references when applicable.
pub struct TableAliasRewriter {
pub table_schema: SchemaRef,
pub alias_name: TableReference,
}

impl TreeNodeRewriter for TableAliasRewriter {
type Node = Expr;

fn f_down(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
match expr {
Expr::Column(column) => {
if let Ok(field) = self.table_schema.field_with_name(&column.name) {
let new_column =
Column::new(Some(self.alias_name.clone()), field.name().clone());
Ok(Transformed::yes(Expr::Column(new_column)))
} else {
Ok(Transformed::no(Expr::Column(column)))
}
}
_ => Ok(Transformed::no(expr)),
}
}
}
26 changes: 13 additions & 13 deletions datafusion/sql/tests/cases/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,19 +705,19 @@ fn test_table_scan_pushdown() -> Result<()> {
"SELECT * FROM t1 WHERE ((t1.id > 1) AND (t1.age < 2))"
);

// TODO: support filters for table scan with alias. Enable this test after #12368 issue is fixed
// see the issue: https://github.com/apache/datafusion/issues/12368
// let table_scan_with_filter_alias = table_scan_with_filters(
// Some("t1"),
// &schema,
// None,
// vec![col("id").gt(col("age"))],
// )?.alias("ta")?.build()?;
// let table_scan_with_filter_alias = plan_to_sql(&table_scan_with_filter_alias)?;
// assert_eq!(
// format!("{}", table_scan_with_filter_alias),
// "SELECT * FROM t1 AS ta WHERE (ta.id > ta.age)"
// );
let table_scan_with_filter_alias = table_scan_with_filters(
Some("t1"),
&schema,
None,
vec![col("id").gt(col("age"))],
)?
.alias("ta")?
.build()?;
let table_scan_with_filter_alias = plan_to_sql(&table_scan_with_filter_alias)?;
assert_eq!(
format!("{}", table_scan_with_filter_alias),
"SELECT * FROM t1 AS ta WHERE (ta.id > ta.age)"
);

let table_scan_with_projection_and_filter = table_scan_with_filters(
Some("t1"),
Expand Down
6 changes: 0 additions & 6 deletions datafusion/sqllogictest/test_files/expr.slt
Original file line number Diff line number Diff line change
Expand Up @@ -229,12 +229,6 @@ SELECT interval '5 day'
----
5 days

# Hour is ignored, this matches PostgreSQL
query ?
SELECT interval '5 day' hour
----
5 days

query ?
SELECT interval '5 day 4 hours 3 minutes 2 seconds 100 milliseconds'
----
Expand Down
41 changes: 41 additions & 0 deletions datafusion/sqllogictest/test_files/interval.slt
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,31 @@ select i - d from t;
query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
select i - ts from t;

# interval unit abreiviation and plurals
query ?
select interval '1s'
----
1.000000000 secs

query ?
select '1s'::interval
----
1.000000000 secs

query ?
select interval'1sec'
----
1.000000000 secs

query ?
select interval '1ms'
----
0.001000000 secs

query ?
select interval '1 y' + interval '1 year'
----
24 mons

# interval (scalar) + date / timestamp (array)
query D
Expand All @@ -502,6 +527,22 @@ select '1 month'::interval + ts from t;
2000-02-01T12:11:10
2000-03-01T00:00:00

# trailing extra unit, this matches PostgreSQL
query ?
select interval '5 day 1' hour
----
5 days 1 hours

# trailing extra unit, this matches PostgreSQL
query ?
select interval '5 day 0' hour
----
5 days

# This is interpreted as "0 hours" with PostgreSQL, should be fixed with
query error DataFusion error: Arrow error: Parser error: Invalid input syntax for type interval: "5 day HOUR"
SELECT interval '5 day' hour

# expected error interval (scalar) - date / timestamp (array)
query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
select '1 month'::interval - d from t;
Expand Down

0 comments on commit 7d63bd2

Please sign in to comment.