Skip to content

Commit 0e77fb2

Browse files
irenjjalamb
andauthored
Simpler to see expressions in explain tree mode (#15163)
* Simpler to see expressions in tree explain mode * add more * add cast * rename * fmt * add sql_formatter * add license * fix test * fix doc * Update datafusion/physical-expr-common/src/physical_expr.rs Co-authored-by: Andrew Lamb <[email protected]> * add example * fix * simplify col * fix * Update plan * Rename sql_formatter, add doc and examples --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 563da92 commit 0e77fb2

File tree

20 files changed

+551
-47
lines changed

20 files changed

+551
-47
lines changed

datafusion/physical-expr-common/src/physical_expr.rs

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
// under the License.
1717

1818
use std::any::Any;
19+
use std::fmt;
1920
use std::fmt::{Debug, Display, Formatter};
2021
use std::hash::{Hash, Hasher};
2122
use std::sync::Arc;
@@ -53,6 +54,12 @@ pub type PhysicalExprRef = Arc<dyn PhysicalExpr>;
5354
/// * [`SessionContext::create_physical_expr`]: A high level API
5455
/// * [`create_physical_expr`]: A low level API
5556
///
57+
/// # Formatting `PhysicalExpr` as strings
58+
/// There are three ways to format `PhysicalExpr` as a string:
59+
/// * [`Debug`]: Standard Rust debugging format (e.g. `Constant { value: ... }`)
60+
/// * [`Display`]: Detailed SQL-like format that shows expression structure (e.g. (`Utf8 ("foobar")`). This is often used for debugging and tests
61+
/// * [`Self::fmt_sql`]: SQL-like human readable format (e.g. ('foobar')`), See also [`sql_fmt`]
62+
///
5663
/// [`SessionContext::create_physical_expr`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.create_physical_expr
5764
/// [`PhysicalPlanner`]: https://docs.rs/datafusion/latest/datafusion/physical_planner/trait.PhysicalPlanner.html
5865
/// [`Expr`]: https://docs.rs/datafusion/latest/datafusion/logical_expr/enum.Expr.html
@@ -266,6 +273,16 @@ pub trait PhysicalExpr: Send + Sync + Display + Debug + DynEq + DynHash {
266273
fn get_properties(&self, _children: &[ExprProperties]) -> Result<ExprProperties> {
267274
Ok(ExprProperties::new_unknown())
268275
}
276+
277+
/// Format this `PhysicalExpr` in nice human readable "SQL" format
278+
///
279+
/// Specifically, this format is designed to be readable by humans, at the
280+
/// expense of details. Use `Display` or `Debug` for more detailed
281+
/// representation.
282+
///
283+
/// See the [`fmt_sql`] function for an example of printing `PhysicalExpr`s as SQL.
284+
///
285+
fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result;
269286
}
270287

271288
/// [`PhysicalExpr`] can't be constrained by [`Eq`] directly because it must remain object
@@ -363,7 +380,7 @@ where
363380
I: Iterator + Clone,
364381
I::Item: Display,
365382
{
366-
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
383+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
367384
let mut iter = self.0.clone();
368385
write!(f, "[")?;
369386
if let Some(expr) = iter.next() {
@@ -379,3 +396,53 @@ where
379396

380397
DisplayWrapper(exprs.into_iter())
381398
}
399+
400+
/// Prints a [`PhysicalExpr`] in a SQL-like format
401+
///
402+
/// # Example
403+
/// ```
404+
/// # // The boiler plate needed to create a `PhysicalExpr` for the example
405+
/// # use std::any::Any;
406+
/// # use std::fmt::Formatter;
407+
/// # use std::sync::Arc;
408+
/// # use arrow::array::RecordBatch;
409+
/// # use arrow::datatypes::{DataType, Schema};
410+
/// # use datafusion_common::Result;
411+
/// # use datafusion_expr_common::columnar_value::ColumnarValue;
412+
/// # use datafusion_physical_expr_common::physical_expr::{fmt_sql, DynEq, PhysicalExpr};
413+
/// # #[derive(Debug, Hash, PartialOrd, PartialEq)]
414+
/// # struct MyExpr {};
415+
/// # impl PhysicalExpr for MyExpr {fn as_any(&self) -> &dyn Any { unimplemented!() }
416+
/// # fn data_type(&self, input_schema: &Schema) -> Result<DataType> { unimplemented!() }
417+
/// # fn nullable(&self, input_schema: &Schema) -> Result<bool> { unimplemented!() }
418+
/// # fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> { unimplemented!() }
419+
/// # fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>>{ unimplemented!() }
420+
/// # fn with_new_children(self: Arc<Self>, children: Vec<Arc<dyn PhysicalExpr>>) -> Result<Arc<dyn PhysicalExpr>> { unimplemented!() }
421+
/// # fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "CASE a > b THEN 1 ELSE 0 END") }
422+
/// # }
423+
/// # impl std::fmt::Display for MyExpr {fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { unimplemented!() } }
424+
/// # impl DynEq for MyExpr {fn dyn_eq(&self, other: &dyn Any) -> bool { unimplemented!() } }
425+
/// # fn make_physical_expr() -> Arc<dyn PhysicalExpr> { Arc::new(MyExpr{}) }
426+
/// let expr: Arc<dyn PhysicalExpr> = make_physical_expr();
427+
/// // wrap the expression in `sql_fmt` which can be used with
428+
/// // `format!`, `to_string()`, etc
429+
/// let expr_as_sql = fmt_sql(expr.as_ref());
430+
/// assert_eq!(
431+
/// "The SQL: CASE a > b THEN 1 ELSE 0 END",
432+
/// format!("The SQL: {expr_as_sql}")
433+
/// );
434+
/// ```
435+
pub fn fmt_sql(expr: &dyn PhysicalExpr) -> impl Display + '_ {
436+
struct Wrapper<'a> {
437+
expr: &'a dyn PhysicalExpr,
438+
}
439+
440+
impl Display for Wrapper<'_> {
441+
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
442+
self.expr.fmt_sql(f)?;
443+
Ok(())
444+
}
445+
}
446+
447+
Wrapper { expr }
448+
}

datafusion/physical-expr-common/src/sort_expr.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ use itertools::Itertools;
3737
/// Example:
3838
/// ```
3939
/// # use std::any::Any;
40-
/// # use std::fmt::Display;
40+
/// # use std::fmt::{Display, Formatter};
4141
/// # use std::hash::Hasher;
4242
/// # use std::sync::Arc;
4343
/// # use arrow::array::RecordBatch;
@@ -58,6 +58,7 @@ use itertools::Itertools;
5858
/// # fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {todo!() }
5959
/// # fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {todo!()}
6060
/// # fn with_new_children(self: Arc<Self>, children: Vec<Arc<dyn PhysicalExpr>>) -> Result<Arc<dyn PhysicalExpr>> {todo!()}
61+
/// # fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result { todo!() }
6162
/// # }
6263
/// # impl Display for MyPhysicalExpr {
6364
/// # fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "a") }

datafusion/physical-expr/src/expressions/binary.rs

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,32 @@ impl PhysicalExpr for BinaryExpr {
571571
_ => Ok(ExprProperties::new_unknown()),
572572
}
573573
}
574+
575+
fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
576+
fn write_child(
577+
f: &mut std::fmt::Formatter,
578+
expr: &dyn PhysicalExpr,
579+
precedence: u8,
580+
) -> std::fmt::Result {
581+
if let Some(child) = expr.as_any().downcast_ref::<BinaryExpr>() {
582+
let p = child.op.precedence();
583+
if p == 0 || p < precedence {
584+
write!(f, "(")?;
585+
child.fmt_sql(f)?;
586+
write!(f, ")")
587+
} else {
588+
child.fmt_sql(f)
589+
}
590+
} else {
591+
expr.fmt_sql(f)
592+
}
593+
}
594+
595+
let precedence = self.op.precedence();
596+
write_child(f, self.left.as_ref(), precedence)?;
597+
write!(f, " {} ", self.op)?;
598+
write_child(f, self.right.as_ref(), precedence)
599+
}
574600
}
575601

576602
/// Casts dictionary array to result type for binary numerical operators. Such operators
@@ -770,6 +796,7 @@ mod tests {
770796
use crate::expressions::{col, lit, try_cast, Column, Literal};
771797

772798
use datafusion_common::plan_datafusion_err;
799+
use datafusion_physical_expr_common::physical_expr::fmt_sql;
773800

774801
/// Performs a binary operation, applying any type coercion necessary
775802
fn binary_op(
@@ -4672,4 +4699,72 @@ mod tests {
46724699

46734700
Ok(())
46744701
}
4702+
4703+
#[test]
4704+
fn test_fmt_sql() -> Result<()> {
4705+
let schema = Schema::new(vec![
4706+
Field::new("a", DataType::Int32, false),
4707+
Field::new("b", DataType::Int32, false),
4708+
]);
4709+
4710+
// Test basic binary expressions
4711+
let simple_expr = binary_expr(
4712+
col("a", &schema)?,
4713+
Operator::Plus,
4714+
col("b", &schema)?,
4715+
&schema,
4716+
)?;
4717+
let display_string = simple_expr.to_string();
4718+
assert_eq!(display_string, "a@0 + b@1");
4719+
let sql_string = fmt_sql(&simple_expr).to_string();
4720+
assert_eq!(sql_string, "a + b");
4721+
4722+
// Test nested expressions with different operator precedence
4723+
let nested_expr = binary_expr(
4724+
Arc::new(binary_expr(
4725+
col("a", &schema)?,
4726+
Operator::Plus,
4727+
col("b", &schema)?,
4728+
&schema,
4729+
)?),
4730+
Operator::Multiply,
4731+
col("b", &schema)?,
4732+
&schema,
4733+
)?;
4734+
let display_string = nested_expr.to_string();
4735+
assert_eq!(display_string, "(a@0 + b@1) * b@1");
4736+
let sql_string = fmt_sql(&nested_expr).to_string();
4737+
assert_eq!(sql_string, "(a + b) * b");
4738+
4739+
// Test nested expressions with same operator precedence
4740+
let nested_same_prec = binary_expr(
4741+
Arc::new(binary_expr(
4742+
col("a", &schema)?,
4743+
Operator::Plus,
4744+
col("b", &schema)?,
4745+
&schema,
4746+
)?),
4747+
Operator::Plus,
4748+
col("b", &schema)?,
4749+
&schema,
4750+
)?;
4751+
let display_string = nested_same_prec.to_string();
4752+
assert_eq!(display_string, "a@0 + b@1 + b@1");
4753+
let sql_string = fmt_sql(&nested_same_prec).to_string();
4754+
assert_eq!(sql_string, "a + b + b");
4755+
4756+
// Test with literals
4757+
let lit_expr = binary_expr(
4758+
col("a", &schema)?,
4759+
Operator::Eq,
4760+
lit(ScalarValue::Int32(Some(42))),
4761+
&schema,
4762+
)?;
4763+
let display_string = lit_expr.to_string();
4764+
assert_eq!(display_string, "a@0 = 42");
4765+
let sql_string = fmt_sql(&lit_expr).to_string();
4766+
assert_eq!(sql_string, "a = 42");
4767+
4768+
Ok(())
4769+
}
46754770
}

datafusion/physical-expr/src/expressions/case.rs

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,29 @@ impl PhysicalExpr for CaseExpr {
559559
)?))
560560
}
561561
}
562+
563+
fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
564+
write!(f, "CASE ")?;
565+
if let Some(e) = &self.expr {
566+
e.fmt_sql(f)?;
567+
write!(f, " ")?;
568+
}
569+
570+
for (w, t) in &self.when_then_expr {
571+
write!(f, "WHEN ")?;
572+
w.fmt_sql(f)?;
573+
write!(f, " THEN ")?;
574+
t.fmt_sql(f)?;
575+
write!(f, " ")?;
576+
}
577+
578+
if let Some(e) = &self.else_expr {
579+
write!(f, "ELSE ")?;
580+
e.fmt_sql(f)?;
581+
write!(f, " ")?;
582+
}
583+
write!(f, "END")
584+
}
562585
}
563586

564587
/// Create a CASE expression
@@ -583,6 +606,7 @@ mod tests {
583606
use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
584607
use datafusion_expr::type_coercion::binary::comparison_coercion;
585608
use datafusion_expr::Operator;
609+
use datafusion_physical_expr_common::physical_expr::fmt_sql;
586610

587611
#[test]
588612
fn case_with_expr() -> Result<()> {
@@ -1378,4 +1402,35 @@ mod tests {
13781402
comparison_coercion(&left_type, right_type)
13791403
})
13801404
}
1405+
1406+
#[test]
1407+
fn test_fmt_sql() -> Result<()> {
1408+
let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
1409+
1410+
// CASE WHEN a = 'foo' THEN 123.3 ELSE 999 END
1411+
let when = binary(col("a", &schema)?, Operator::Eq, lit("foo"), &schema)?;
1412+
let then = lit(123.3f64);
1413+
let else_value = lit(999i32);
1414+
1415+
let expr = generate_case_when_with_type_coercion(
1416+
None,
1417+
vec![(when, then)],
1418+
Some(else_value),
1419+
&schema,
1420+
)?;
1421+
1422+
let display_string = expr.to_string();
1423+
assert_eq!(
1424+
display_string,
1425+
"CASE WHEN a@0 = foo THEN 123.3 ELSE TRY_CAST(999 AS Float64) END"
1426+
);
1427+
1428+
let sql_string = fmt_sql(expr.as_ref()).to_string();
1429+
assert_eq!(
1430+
sql_string,
1431+
"CASE WHEN a = foo THEN 123.3 ELSE TRY_CAST(999 AS Float64) END"
1432+
);
1433+
1434+
Ok(())
1435+
}
13811436
}

datafusion/physical-expr/src/expressions/cast.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,14 @@ impl PhysicalExpr for CastExpr {
194194
Ok(ExprProperties::new_unknown().with_range(unbounded))
195195
}
196196
}
197+
198+
fn fmt_sql(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
199+
write!(f, "CAST(")?;
200+
self.expr.fmt_sql(f)?;
201+
write!(f, " AS {:?}", self.cast_type)?;
202+
203+
write!(f, ")")
204+
}
197205
}
198206

199207
/// Return a PhysicalExpression representing `expr` casted to
@@ -243,6 +251,7 @@ mod tests {
243251
datatypes::*,
244252
};
245253
use datafusion_common::assert_contains;
254+
use datafusion_physical_expr_common::physical_expr::fmt_sql;
246255

247256
// runs an end-to-end test of physical type cast
248257
// 1. construct a record batch with a column "a" of type A
@@ -766,4 +775,26 @@ mod tests {
766775
expression.evaluate(&batch)?;
767776
Ok(())
768777
}
778+
779+
#[test]
780+
fn test_fmt_sql() -> Result<()> {
781+
let schema = Schema::new(vec![Field::new("a", Int32, true)]);
782+
783+
// Test numeric casting
784+
let expr = cast(col("a", &schema)?, &schema, Int64)?;
785+
let display_string = expr.to_string();
786+
assert_eq!(display_string, "CAST(a@0 AS Int64)");
787+
let sql_string = fmt_sql(expr.as_ref()).to_string();
788+
assert_eq!(sql_string, "CAST(a AS Int64)");
789+
790+
// Test string casting
791+
let schema = Schema::new(vec![Field::new("b", Utf8, true)]);
792+
let expr = cast(col("b", &schema)?, &schema, Int32)?;
793+
let display_string = expr.to_string();
794+
assert_eq!(display_string, "CAST(b@0 AS Int32)");
795+
let sql_string = fmt_sql(expr.as_ref()).to_string();
796+
assert_eq!(sql_string, "CAST(b AS Int32)");
797+
798+
Ok(())
799+
}
769800
}

datafusion/physical-expr/src/expressions/column.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ impl PhysicalExpr for Column {
137137
) -> Result<Arc<dyn PhysicalExpr>> {
138138
Ok(self)
139139
}
140+
141+
fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
142+
write!(f, "{}", self.name)
143+
}
140144
}
141145

142146
impl Column {

0 commit comments

Comments
 (0)