Skip to content

Commit

Permalink
docs
Browse files Browse the repository at this point in the history
  • Loading branch information
wjones127 committed Sep 4, 2023
1 parent ff7ed70 commit 45ae442
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 132 deletions.
53 changes: 52 additions & 1 deletion datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,58 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
expr.rewrite(&mut expr_rewrite)
}

/// Add guarantees
/// Input guarantees and simplify the expression.
///
/// The guarantees can simplify expressions. For example, if a column is
/// guaranteed to always be a certain value, it's references in the expression
/// can be replaced with that literal.
///
/// ```rust
/// use arrow::datatypes::{DataType, Field, Schema};
/// use datafusion_expr::{col, lit, Expr};
/// use datafusion_common::{Result, ScalarValue, ToDFSchema};
/// use datafusion_physical_expr::execution_props::ExecutionProps;
/// use datafusion_optimizer::simplify_expressions::{
/// ExprSimplifier, SimplifyContext,
/// guarantees::{Guarantee, GuaranteeBound, NullStatus}};
///
/// let schema = Schema::new(vec![
/// Field::new("x", DataType::Int64, false),
/// Field::new("y", DataType::UInt32, false),
/// Field::new("z", DataType::Int64, false),
/// ])
/// .to_dfschema_ref().unwrap();
///
/// // Create the simplifier
/// let props = ExecutionProps::new();
/// let context = SimplifyContext::new(&props)
/// .with_schema(schema);
/// let simplifier = ExprSimplifier::new(context);
///
/// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5)
/// let expr_x = col("x").gt_eq(lit(3_i64));
/// let expr_y = (col("y") + lit(2_u32)).lt(lit(10_u32));
/// let expr_z = col("z").gt(lit(5_i64));
/// let expr = expr_x.and(expr_y).and(expr_z.clone());
///
/// let guarantees = vec![
/// // x is guaranteed to be between 3 and 5
/// (
/// col("x"),
/// Guarantee::new(
/// Some(GuaranteeBound::new(ScalarValue::Int64(Some(3)), false)),
/// Some(GuaranteeBound::new(ScalarValue::Int64(Some(5)), false)),
/// NullStatus::NeverNull,
/// )
/// ),
/// // y is guaranteed to be 3
/// (col("y"), Guarantee::from(&ScalarValue::UInt32(Some(3)))),
/// ];
/// let output = simplifier.simplify_with_guarantees(expr, &guarantees).unwrap();
/// // Expression becomes: true AND true AND (z > 5), which simplifies to
/// // z > 5.
/// assert_eq!(output, expr_z);
/// ```
pub fn simplify_with_guarantees<'a>(
&self,
expr: Expr,
Expand Down
36 changes: 30 additions & 6 deletions datafusion/optimizer/src/simplify_expressions/guarantees.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,34 @@
// specific language governing permissions and limitations
// under the License.

//! Logic to inject guarantees with expressions.
//! Guarantees which can be used with [ExprSimplifier::simplify_with_guarantees()][crate::simplify_expressions::expr_simplifier::ExprSimplifier::simplify_with_guarantees].
//!
//! Guarantees can represent single values or possible ranges of values.
//!
//! ```
//! use datafusion_optimizer::simplify_expressions::guarantees::{
//! Guarantee, GuaranteeBound, NullStatus};
//!
//! // Guarantee that value is always 1_i32
//! Guarantee::from(&ScalarValue::Int32(Some(1)));
//! // Guarantee that value is always NULL
//! Guarantee::from(&ScalarValue::Null);
//! // Guarantee that value is always between 1_i32 and 10_i32 (inclusive)
//! // and never null.
//! Guarantee::new(
//! Some(GuaranteeBound::new(ScalarValue::Int32(Some(1)), false)),
//! Some(GuaranteeBound::new(ScalarValue::Int32(Some(10)), false)),
//! NullStatus::NeverNull,
//! );
//! ```
use datafusion_common::{tree_node::TreeNodeRewriter, Result, ScalarValue};
use datafusion_expr::{expr::InList, lit, Between, BinaryExpr, Expr, Operator};
use std::collections::HashMap;

/// A bound on the value of an expression.
#[derive(Debug, Clone, PartialEq)]
pub struct GuaranteeBound {
/// The value of the bound.
/// The value of the bound. If the bound is null, then there is no bound.
pub bound: ScalarValue,
/// If true, the bound is exclusive. If false, the bound is inclusive.
/// In terms of inequalities, this means the bound is `<` or `>` rather than
Expand All @@ -40,6 +58,7 @@ impl GuaranteeBound {
}

impl Default for GuaranteeBound {
/// Default value is a closed bound at null.
fn default() -> Self {
Self {
bound: ScalarValue::Null,
Expand Down Expand Up @@ -70,9 +89,11 @@ pub enum NullStatus {
/// nulls.
#[derive(Debug, Clone, PartialEq)]
pub struct Guarantee {
/// The min values that the expression can take on. If `min.bound` is
/// The min values that the expression can take on. If the min is null, then
/// there is no known min.
pub min: GuaranteeBound,
/// The max values that the expression can take on.
/// The max values that the expression can take on. If the max is null,
/// then there is no known max.
pub max: GuaranteeBound,
/// Whether the expression is expected to be either always null or never null.
pub null_status: NullStatus,
Expand All @@ -97,14 +118,19 @@ impl Guarantee {
self.min.bound > *value || (self.min.bound == *value && self.min.open)
}

/// Whether values are guaranteed to be greater than or equal to the given
/// value.
fn greater_than_or_eq(&self, value: &ScalarValue) -> bool {
self.min.bound >= *value
}

/// Whether values are guaranteed to be less than the given value.
fn less_than(&self, value: &ScalarValue) -> bool {
self.max.bound < *value || (self.max.bound == *value && self.max.open)
}

/// Whether values are guaranteed to be less than or equal to the given
/// value.
fn less_than_or_eq(&self, value: &ScalarValue) -> bool {
self.max.bound <= *value
}
Expand Down Expand Up @@ -136,8 +162,6 @@ impl From<&ScalarValue> for Guarantee {
}

/// Rewrite expressions to incorporate guarantees.
///
///
pub(crate) struct GuaranteeRewriter<'a> {
guarantees: HashMap<&'a Expr, &'a Guarantee>,
}
Expand Down
Loading

0 comments on commit 45ae442

Please sign in to comment.