diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs index 2138bd1294b4..e7aa1172a854 100644 --- a/datafusion/core/src/dataframe/mod.rs +++ b/datafusion/core/src/dataframe/mod.rs @@ -1452,28 +1452,31 @@ impl DataFrame { pub fn with_column(self, name: &str, expr: Expr) -> Result { let window_func_exprs = find_window_exprs(&[expr.clone()]); - let (plan, mut col_exists, window_func) = if window_func_exprs.is_empty() { - (self.plan, false, false) + let (window_fn_str, plan) = if window_func_exprs.is_empty() { + (None, self.plan) } else { ( + Some(window_func_exprs[0].to_string()), LogicalPlanBuilder::window_plan(self.plan, window_func_exprs)?, - true, - true, ) }; + let mut col_exists = false; let new_column = expr.alias(name); let mut fields: Vec = plan .schema() .iter() - .map(|(qualifier, field)| { + .filter_map(|(qualifier, field)| { if field.name() == name { col_exists = true; - new_column.clone() - } else if window_func && qualifier.is_none() { - col(Column::from((qualifier, field))).alias(name) + Some(new_column.clone()) } else { - col(Column::from((qualifier, field))) + let e = col(Column::from((qualifier, field))); + window_fn_str + .as_ref() + .filter(|s| *s == &e.to_string()) + .is_none() + .then_some(e) } }) .collect(); @@ -2975,7 +2978,8 @@ mod tests { Ok(()) } - // Test issue: https://github.com/apache/datafusion/issues/11982 + // Test issues: https://github.com/apache/datafusion/issues/11982 + // and https://github.com/apache/datafusion/issues/12425 // Window function was creating unwanted projection when using with_column() method. #[tokio::test] async fn test_window_function_with_column() -> Result<()> { @@ -2984,19 +2988,24 @@ mod tests { let df_impl = DataFrame::new(ctx.state(), df.plan.clone()); let func = row_number().alias("row_num"); - // Should create an additional column with alias 'r' that has window func results + // This first `with_column` results in a column without a `qualifier` + let df_impl = df_impl.with_column("s", col("c2") + col("c3"))?; + + // This second `with_column` should only alias `func` as `"r"` let df = df_impl.with_column("r", func)?.limit(0, Some(2))?; - assert_eq!(4, df.schema().fields().len()); + + df.clone().show().await?; + assert_eq!(5, df.schema().fields().len()); let df_results = df.clone().collect().await?; assert_batches_sorted_eq!( [ - "+----+----+-----+---+", - "| c1 | c2 | c3 | r |", - "+----+----+-----+---+", - "| c | 2 | 1 | 1 |", - "| d | 5 | -40 | 2 |", - "+----+----+-----+---+", + "+----+----+-----+-----+---+", + "| c1 | c2 | c3 | s | r |", + "+----+----+-----+-----+---+", + "| c | 2 | 1 | 3 | 1 |", + "| d | 5 | -40 | -35 | 2 |", + "+----+----+-----+-----+---+", ], &df_results );