substrait-io · jvanstraten · Sep 20, 2022 · Sep 13, 2022 · Sep 13, 2022 · Sep 14, 2022
diff --git a/rs/src/lib.rs b/rs/src/lib.rs
@@ -174,6 +174,7 @@ use strum::IntoEnumIterator;
 // Aliases for common types used on the crate interface.
 pub use input::config::glob::Pattern;
 pub use input::config::Config;
+pub use output::comment::Comment;
 pub use output::diagnostic::Classification;
 pub use output::diagnostic::Diagnostic;
 pub use output::diagnostic::Level;

diff --git a/rs/src/parse/expressions/literals.rs b/rs/src/parse/expressions/literals.rs
@@ -825,7 +825,7 @@ fn parse_decimal(
         if val >= range || val <= -range {
             Err(cause!(
                 ExpressionIllegalLiteralValue,
-                "decimal value is out of range for specificied precision and scale"
+                "decimal value is out of range for specified precision and scale"
             ))
         } else {
             Literal::new_compound(

diff --git a/rs/src/parse/expressions/mod.rs b/rs/src/parse/expressions/mod.rs
@@ -17,9 +17,10 @@ use crate::util;
 use crate::util::string::Describe;
 
 /// Description of an expression.
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug, Default, PartialEq)]
 pub enum Expression {
     /// Used for unknown expression types.
+    #[default]
     Unresolved,
 
     /// Used for literals.
@@ -44,12 +45,6 @@ pub enum Expression {
     Cast(data::Type, Box<Expression>),
 }
 
-impl Default for Expression {
-    fn default() -> Self {
-        Expression::Unresolved
-    }
-}
-
 impl From<literals::Literal> for Expression {
     fn from(l: literals::Literal) -> Self {
         Expression::Literal(l)

diff --git a/rs/src/parse/relations/filter.rs b/rs/src/parse/relations/filter.rs
@@ -32,14 +32,10 @@ pub fn parse_filter_rel(
     describe!(y, Relation, "Filter by {}", &predicate);
     summary!(
         y,
-        "This relation discards all rows for which {} yields false.",
-        &predicate
+        "This relation discards all rows for which the expression {} yields {}.",
+        &predicate,
+        if nullable { "false or null" } else { "false" }
     );
-    if nullable {
-        // FIXME: what's the behavior when a filter condition is nullable and
-        // yields null? Same applies for all other usages of parse_predicate().
-        summary!(y, "Behavior for a null condition is unspecified.");
-    }
 
     // Handle the common field.
     handle_rel_common!(x, y);

diff --git a/rs/src/parse/relations/join.rs b/rs/src/parse/relations/join.rs
@@ -38,10 +38,9 @@ pub fn parse_join_rel(x: &substrait::JoinRel, y: &mut context::Context) -> diagn
     }
 
     // Parse join expression.
-    let join_expression =
-        proto_boxed_required_field!(x, y, expression, expressions::parse_predicate)
-            .1
-            .unwrap_or_default();
+    let (join_expression_node, opt_join_expression) =
+        proto_boxed_required_field!(x, y, expression, expressions::parse_predicate);
+    let join_expression = opt_join_expression.unwrap_or_default();
 
     // Parse join type.
     let join_type = proto_required_enum_field!(x, y, r#type, JoinType)
@@ -84,7 +83,7 @@ pub fn parse_join_rel(x: &substrait::JoinRel, y: &mut context::Context) -> diagn
 
     // Handle optional post-join filter.
     let filter_expression =
-        proto_boxed_field!(x, y, post_join_filter, expressions::parse_predicate).1;
+        proto_boxed_field!(x, y, post_join_filter, expressions::parse_predicate);
 
     // Describe the relation.
     let prefix = match (join_type, x.post_join_filter.is_some()) {
@@ -106,59 +105,73 @@ pub fn parse_join_rel(x: &substrait::JoinRel, y: &mut context::Context) -> diagn
     };
     describe!(y, Relation, "{prefix} join by {join_expression}");
     summary!(y, "{prefix} join by {join_expression:#}.");
-    y.push_summary(comment::Comment::new().nl().plain(match join_type {
-        JoinType::Unspecified => "",
-        JoinType::Inner => concat!(
-            " Returns rows combining the row from the left and right ",
-            "input for each pair where the join expression yields true.",
-        ),
-        JoinType::Outer => concat!(
-            " Returns rows combining the row from the left and right ",
-            "input for each pair where the join expression yields true. ",
-            "If the join expression never yields true for any left or ",
-            "right row, this returns a row anyway, with the fields ",
-            "corresponding to the other input set to null.",
-        ),
-        JoinType::Left => concat!(
-            " Returns rows combining the row from the left and right ",
-            "input for each pair where the join expression yields true. ",
-            "If the join expression never yields true for a row from the ",
-            "left, this returns a row anyway, with the fields corresponding ",
-            "to the right input set to null.",
-        ),
-        JoinType::Right => concat!(
-            " Returns rows combining the row from the left and right ",
-            "input for each pair where the join expression yields true. ",
-            "If the join expression never yields true for a row from the ",
-            "right, this returns a row anyway, with the fields corresponding ",
-            "to the left input set to null.",
-        ),
-        JoinType::Semi => concat!(
-            " Filters rows from the left input, propagating a row only if ",
-            "the join expression yields true for that row combined with ",
-            "any row from the right input.",
-        ),
-        JoinType::Anti => concat!(
-            " Filters rows from the left input, propagating a row only if ",
-            "the join expression does not yield true for that row combined ",
-            "with any row from the right input.",
-        ),
-        JoinType::Single => concat!(
-            " Returns a row for each row from the left input, concatenating ",
-            "it with the row from the right input for which the join ",
-            "expression yields true. If the expression never yields true for ",
-            "a left input, the fields corresponding to the right input are ",
-            "set to null. If the expression yields true for a left row and ",
-            "multiple right rows, this may return the first pair encountered ",
-            "or throw an error."
-        ),
-    }));
-    if let Some(filter_expression) = filter_expression {
-        y.push_summary(
-            comment::Comment::new()
-                .nl()
-                .plain(format!("The result is filtered by {filter_expression:#}.")),
-        );
+    let nullable = if join_expression_node.data_type().nullable() {
+        "false or null"
+    } else {
+        "false"
+    };
+    y.push_summary(
+        comment::Comment::new().nl().plain(match join_type {
+            JoinType::Unspecified => "".to_string(),
+            JoinType::Inner => format!(
+                "Returns rows combining the row from the left and right \
+                input for each pair where the join expression yields true, \
+                discarding rows where the join expression yields {}.",
+                nullable
+            ),
+            JoinType::Outer => format!(
+                "Returns rows combining the row from the left and right \
+                input for each pair where the join expression yields true, \
+                discarding rows where the join expression yields {}. \
+                If the join expression never yields true for any left or \
+                right row, this returns a row anyway, with the fields \
+                corresponding to the other input set to null.",
+                nullable
+            ),
+            JoinType::Left => format!(
+                "Returns rows combining the row from the left and right \
+                input for each pair where the join expression yields true, \
+                discarding rows where the join expression yields {}. \
+                If the join expression never yields true for a row from the \
+                left, this returns a row anyway, with the fields corresponding \
+                to the right input set to null.",
+                nullable
+            ),
+            JoinType::Right => format!(
+                "Returns rows combining the row from the left and right \
+                input for each pair where the join expression yields true, \
+                discarding rows where the join expression yields {}. \
+                If the join expression never yields true for a row from the \
+                right, this returns a row anyway, with the fields corresponding \
+                to the left input set to null.",
+                nullable
+            ),
+            JoinType::Semi => "Filters rows from the left input, propagating a row only if \
+                              the join expression yields true for that row combined with \
+                              any row from the right input."
+                .to_string(),
+            JoinType::Anti => "Filters rows from the left input, propagating a row only if \
+                              the join expression does not yield true for that row combined \
+                              with any row from the right input."
+                .to_string(),
+            JoinType::Single => "Returns a row for each row from the left input, concatenating \
+                                it with the row from the right input for which the join \
+                                expression yields true. If the expression never yields true for \
+                                a left input, the fields corresponding to the right input are \
+                                set to null. If the expression yields true for a left row and \
+                                multiple right rows, this may return the first pair encountered \
+                                or throw an error."
+                .to_string(),
+        }),
+    );
+
+    if let (Some(node), Some(filter_expression)) = filter_expression {
+        let nullable = node.data_type().nullable();
+        y.push_summary(comment::Comment::new().nl().plain(format!(
+            "The result is filtered by the expression {filter_expression:#}, \
+            discarding all rows for which the filter expression yields {}.",
+            if nullable { "false or null" } else { "false" }
+        )));
     }
 
     // Handle the common field.

diff --git a/tests/runner.py b/tests/runner.py
@@ -303,6 +303,17 @@ def parse_type_instruction(type_str, path):
     return [dict(DataType=dict(path=path, data_type=type_str))]
 
 
+def parse_comment_instruction(comment_test, path):
+    """Parses a comment check instruction in the input format into the
+    Rust/serde instruction syntax."""
+    if comment_test is None:
+        return []
+
+    if not isinstance(comment_test, str):
+        raise Exception("__test.comment must be a string")
+    return [dict(Comment=dict(path=path, msg=comment_test))]
+
+
 def parse_instructions(test_tags, fname, proto_desc):
     """Parses and checks the syntax for instructions in the input format into
     the Rust/serde instruction syntax."""
@@ -327,6 +338,11 @@ def parse_instructions(test_tags, fname, proto_desc):
                     parse_type_instruction(insn_type.pop("type", None), path)
                 )
 
+                # Handle comment instructions.
+                instructions.extend(
+                    parse_comment_instruction(insn_type.pop("comment", None), path)
+                )
+
                 if insn_type:
                     raise Exception(
                         "Found unknown __test key(s): {}".format(

diff --git a/tests/src/runner.rs b/tests/src/runner.rs
@@ -74,6 +74,12 @@ struct DiagnosticTest {
     pub after: Option<PathElement>,
 }
 
+#[derive(serde::Deserialize, Debug)]
+struct CommentTest {
+    pub path: Vec<PathElement>,
+    pub msg: String,
+}
+
 #[derive(serde::Deserialize, Debug)]
 struct DataTypeTest {
     pub path: Vec<PathElement>,
@@ -123,6 +129,7 @@ enum Instruction {
     Level(LevelTest),
     Diag(DiagnosticTest),
     DataType(DataTypeTest),
+    Comment(CommentTest),
 }
 
 /// A diagnostic level override command.
@@ -359,6 +366,23 @@ impl TestCase {
         })
     }
 
+    /// Runs the given comment test instruction.
+    fn run_comment_test(
+        result: &mut TestResult,
+        root: &mut sv::output::tree::Node,
+        desc: &CommentTest,
+    ) {
+        let path = convert_path(&desc.path);
+        result.log(format!("Checking comment at {path}..."));
+        Self::traverse(result, root, path.elements.iter(), |result, node| {
+            let actual = format!("{}", node.summary.clone().unwrap_or_default());
+            let pattern = glob::Pattern::new(&desc.msg).unwrap();
+            if !pattern.matches(&actual) {
+                result.error(format!("comment mismatch; found {actual}"));
+            }
+        })
+    }
+
     /// Runs the given test case, updating result.
     fn run(
         result: &mut TestResult,
@@ -420,6 +444,7 @@ impl TestCase {
                 Instruction::DataType(data_type) => {
                     Self::run_data_type_test(result, &mut root, data_type)
                 }
+                Instruction::Comment(comment) => Self::run_comment_test(result, &mut root, comment),
             }
         }
     }

diff --git a/tests/tests/relations/filter/nullability-bool-expression.yaml b/tests/tests/relations/filter/nullability-bool-expression.yaml
@@ -0,0 +1,46 @@
+name: filter-nullability-bool-expression
+plan:
+  __test: [level: i]
+  relations:
+    - rel:
+        filter:
+          input:
+            read:
+              baseSchema:
+                names: [a, b]
+                struct:
+                  nullability: NULLABILITY_REQUIRED
+                  types:
+                    - string: { nullability: NULLABILITY_REQUIRED }
+                    - bool: { nullability: NULLABILITY_NULLABLE }
+              namedTable:
+                names:
+                  - test
+          condition:
+            literal:
+              nullable: true
+              boolean: false
+          __test:
+            [
+              comment: "*false or null.",
+              type: "NSTRUCT<a: string, b: boolean?>",
+            ]
+    - rel:
+        filter:
+          input:
+            read:
+              baseSchema:
+                names: [a, b]
+                struct:
+                  nullability: NULLABILITY_REQUIRED
+                  types:
+                    - string: { nullability: NULLABILITY_REQUIRED }
+                    - bool: { nullability: NULLABILITY_REQUIRED }
+              namedTable:
+                names:
+                  - test
+          condition:
+            literal:
+              nullable: false
+              boolean: false
+          __test: [comment: "*false.", type: "NSTRUCT<a: string, b: boolean>"]