Skip to content

Commit

Permalink
Optionally display schema in explain plan (apache#11177)
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb authored and findepi committed Jul 16, 2024
1 parent a93cac7 commit 3a89a3d
Show file tree
Hide file tree
Showing 12 changed files with 193 additions and 31 deletions.
3 changes: 3 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,9 @@ config_namespace! {

/// When set to true, the explain statement will print the partition sizes
pub show_sizes: bool, default = true

/// When set to true, the explain statement will print schema information
pub show_schema: bool, default = false
}
}

Expand Down
12 changes: 11 additions & 1 deletion datafusion/common/src/display/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,19 @@ pub enum PlanType {
InitialPhysicalPlan,
/// The initial physical plan with stats, prepared for execution
InitialPhysicalPlanWithStats,
/// The initial physical plan with schema, prepared for execution
InitialPhysicalPlanWithSchema,
/// The ExecutionPlan which results from applying an optimizer pass
OptimizedPhysicalPlan {
/// The name of the optimizer which produced this plan
optimizer_name: String,
},
/// The final, fully optimized physical which would be executed
FinalPhysicalPlan,
/// The final with stats, fully optimized physical which would be executed
/// The final with stats, fully optimized physical plan which would be executed
FinalPhysicalPlanWithStats,
/// The final with stats, fully optimized physical plan which would be executed
FinalPhysicalPlanWithSchema,
}

impl Display for PlanType {
Expand All @@ -76,11 +80,17 @@ impl Display for PlanType {
PlanType::InitialPhysicalPlanWithStats => {
write!(f, "initial_physical_plan_with_stats")
}
PlanType::InitialPhysicalPlanWithSchema => {
write!(f, "initial_physical_plan_with_schema")
}
PlanType::OptimizedPhysicalPlan { optimizer_name } => {
write!(f, "physical_plan after {optimizer_name}")
}
PlanType::FinalPhysicalPlan => write!(f, "physical_plan"),
PlanType::FinalPhysicalPlanWithStats => write!(f, "physical_plan_with_stats"),
PlanType::FinalPhysicalPlanWithSchema => {
write!(f, "physical_plan_with_schema")
}
}
}
}
Expand Down
71 changes: 50 additions & 21 deletions datafusion/core/src/physical_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1983,23 +1983,37 @@ impl DefaultPhysicalPlanner {
.await
{
Ok(input) => {
// This plan will includes statistics if show_statistics is on
// Include statistics / schema if enabled
stringified_plans.push(
displayable(input.as_ref())
.set_show_statistics(config.show_statistics)
.set_show_schema(config.show_schema)
.to_stringified(e.verbose, InitialPhysicalPlan),
);

// If the show_statisitcs is off, add another line to show statsitics in the case of explain verbose
if e.verbose && !config.show_statistics {
stringified_plans.push(
displayable(input.as_ref())
.set_show_statistics(true)
.to_stringified(
e.verbose,
InitialPhysicalPlanWithStats,
),
);
// Show statistics + schema in verbose output even if not
// explicitly requested
if e.verbose {
if !config.show_statistics {
stringified_plans.push(
displayable(input.as_ref())
.set_show_statistics(true)
.to_stringified(
e.verbose,
InitialPhysicalPlanWithStats,
),
);
}
if !config.show_schema {
stringified_plans.push(
displayable(input.as_ref())
.set_show_schema(true)
.to_stringified(
e.verbose,
InitialPhysicalPlanWithSchema,
),
);
}
}

let optimized_plan = self.optimize_internal(
Expand All @@ -2011,6 +2025,7 @@ impl DefaultPhysicalPlanner {
stringified_plans.push(
displayable(plan)
.set_show_statistics(config.show_statistics)
.set_show_schema(config.show_schema)
.to_stringified(e.verbose, plan_type),
);
},
Expand All @@ -2021,19 +2036,33 @@ impl DefaultPhysicalPlanner {
stringified_plans.push(
displayable(input.as_ref())
.set_show_statistics(config.show_statistics)
.set_show_schema(config.show_schema)
.to_stringified(e.verbose, FinalPhysicalPlan),
);

// If the show_statisitcs is off, add another line to show statsitics in the case of explain verbose
if e.verbose && !config.show_statistics {
stringified_plans.push(
displayable(input.as_ref())
.set_show_statistics(true)
.to_stringified(
e.verbose,
FinalPhysicalPlanWithStats,
),
);
// Show statistics + schema in verbose output even if not
// explicitly requested
if e.verbose {
if !config.show_statistics {
stringified_plans.push(
displayable(input.as_ref())
.set_show_statistics(true)
.to_stringified(
e.verbose,
FinalPhysicalPlanWithStats,
),
);
}
if !config.show_schema {
stringified_plans.push(
displayable(input.as_ref())
.set_show_schema(true)
.to_stringified(
e.verbose,
FinalPhysicalPlanWithSchema,
),
);
}
}
}
Err(DataFusionError::Context(optimizer_name, e)) => {
Expand Down
51 changes: 46 additions & 5 deletions datafusion/physical-plan/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
use std::fmt;
use std::fmt::Formatter;

use super::{accept, ExecutionPlan, ExecutionPlanVisitor};

use arrow_schema::SchemaRef;

use datafusion_common::display::{GraphvizBuilder, PlanType, StringifiedPlan};
use datafusion_expr::display_schema;
use datafusion_physical_expr::{LexOrdering, PhysicalSortExpr};

use super::{accept, ExecutionPlan, ExecutionPlanVisitor};

/// Options for controlling how each [`ExecutionPlan`] should format itself
#[derive(Debug, Clone, Copy)]
pub enum DisplayFormatType {
Expand All @@ -37,12 +39,15 @@ pub enum DisplayFormatType {
}

/// Wraps an `ExecutionPlan` with various ways to display this plan
#[derive(Debug, Clone)]
pub struct DisplayableExecutionPlan<'a> {
inner: &'a dyn ExecutionPlan,
/// How to show metrics
show_metrics: ShowMetrics,
/// If statistics should be displayed
show_statistics: bool,
/// If schema should be displayed. See [`Self::set_show_schema`]
show_schema: bool,
}

impl<'a> DisplayableExecutionPlan<'a> {
Expand All @@ -53,6 +58,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
inner,
show_metrics: ShowMetrics::None,
show_statistics: false,
show_schema: false,
}
}

Expand All @@ -64,6 +70,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
inner,
show_metrics: ShowMetrics::Aggregated,
show_statistics: false,
show_schema: false,
}
}

Expand All @@ -75,9 +82,19 @@ impl<'a> DisplayableExecutionPlan<'a> {
inner,
show_metrics: ShowMetrics::Full,
show_statistics: false,
show_schema: false,
}
}

/// Enable display of schema
///
/// If true, plans will be displayed with schema information at the end
/// of each line. The format is `schema=[[a:Int32;N, b:Int32;N, c:Int32;N]]`
pub fn set_show_schema(mut self, show_schema: bool) -> Self {
self.show_schema = show_schema;
self
}

/// Enable display of statistics
pub fn set_show_statistics(mut self, show_statistics: bool) -> Self {
self.show_statistics = show_statistics;
Expand Down Expand Up @@ -105,6 +122,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
plan: &'a dyn ExecutionPlan,
show_metrics: ShowMetrics,
show_statistics: bool,
show_schema: bool,
}
impl<'a> fmt::Display for Wrapper<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Expand All @@ -114,6 +132,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
indent: 0,
show_metrics: self.show_metrics,
show_statistics: self.show_statistics,
show_schema: self.show_schema,
};
accept(self.plan, &mut visitor)
}
Expand All @@ -123,6 +142,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
plan: self.inner,
show_metrics: self.show_metrics,
show_statistics: self.show_statistics,
show_schema: self.show_schema,
}
}

Expand Down Expand Up @@ -179,6 +199,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
plan: &'a dyn ExecutionPlan,
show_metrics: ShowMetrics,
show_statistics: bool,
show_schema: bool,
}

impl<'a> fmt::Display for Wrapper<'a> {
Expand All @@ -189,6 +210,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
indent: 0,
show_metrics: self.show_metrics,
show_statistics: self.show_statistics,
show_schema: self.show_schema,
};
visitor.pre_visit(self.plan)?;
Ok(())
Expand All @@ -199,6 +221,7 @@ impl<'a> DisplayableExecutionPlan<'a> {
plan: self.inner,
show_metrics: self.show_metrics,
show_statistics: self.show_statistics,
show_schema: self.show_schema,
}
}

Expand All @@ -221,6 +244,14 @@ enum ShowMetrics {
}

/// Formats plans with a single line per node.
///
/// # Example
///
/// ```text
/// ProjectionExec: expr=[column1@0 + 2 as column1 + Int64(2)]
/// FilterExec: column1@0 = 5
/// ValuesExec
/// ```
struct IndentVisitor<'a, 'b> {
/// How to format each node
t: DisplayFormatType,
Expand All @@ -232,6 +263,8 @@ struct IndentVisitor<'a, 'b> {
show_metrics: ShowMetrics,
/// If statistics should be displayed
show_statistics: bool,
/// If schema should be displayed
show_schema: bool,
}

impl<'a, 'b> ExecutionPlanVisitor for IndentVisitor<'a, 'b> {
Expand Down Expand Up @@ -265,6 +298,13 @@ impl<'a, 'b> ExecutionPlanVisitor for IndentVisitor<'a, 'b> {
let stats = plan.statistics().map_err(|_e| fmt::Error)?;
write!(self.f, ", statistics=[{}]", stats)?;
}
if self.show_schema {
write!(
self.f,
", schema={}",
display_schema(plan.schema().as_ref())
)?;
}
writeln!(self.f)?;
self.indent += 1;
Ok(true)
Expand Down Expand Up @@ -465,12 +505,13 @@ mod tests {
use std::fmt::Write;
use std::sync::Arc;

use super::DisplayableExecutionPlan;
use crate::{DisplayAs, ExecutionPlan, PlanProperties};

use datafusion_common::{DataFusionError, Result, Statistics};
use datafusion_execution::{SendableRecordBatchStream, TaskContext};

use crate::{DisplayAs, ExecutionPlan, PlanProperties};

use super::DisplayableExecutionPlan;

#[derive(Debug, Clone, Copy)]
enum TestStatsExecPlan {
Panic,
Expand Down
2 changes: 2 additions & 0 deletions datafusion/proto/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -669,9 +669,11 @@ message PlanType {
datafusion_common.EmptyMessage FinalLogicalPlan = 3;
datafusion_common.EmptyMessage InitialPhysicalPlan = 4;
datafusion_common.EmptyMessage InitialPhysicalPlanWithStats = 9;
datafusion_common.EmptyMessage InitialPhysicalPlanWithSchema = 11;
OptimizedPhysicalPlanType OptimizedPhysicalPlan = 5;
datafusion_common.EmptyMessage FinalPhysicalPlan = 6;
datafusion_common.EmptyMessage FinalPhysicalPlanWithStats = 10;
datafusion_common.EmptyMessage FinalPhysicalPlanWithSchema = 12;
}
}

Expand Down
Loading

0 comments on commit 3a89a3d

Please sign in to comment.