From 4da4aa497e0867a464c4a0bbd08b167152fab6ae Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Mon, 23 May 2022 11:44:51 +0200 Subject: [PATCH 1/2] feat: implement literal features added in Substrait 0.3.0, allow anchor 0 --- rs/src/input/traits.rs | 24 ++ rs/src/output/diagnostic.rs | 3 + rs/src/parse/expressions/literals.rs | 336 +++++++++++++----- rs/src/parse/extensions/simple/mod.rs | 37 +- rs/src/parse/relations/read.rs | 2 +- rs/src/parse/types.rs | 27 +- substrait | 2 +- .../expressions/literals/interval_day.yaml | 21 ++ 8 files changed, 335 insertions(+), 117 deletions(-) diff --git a/rs/src/input/traits.rs b/rs/src/input/traits.rs index d3a02946..ac25dd9b 100644 --- a/rs/src/input/traits.rs +++ b/rs/src/input/traits.rs @@ -127,6 +127,30 @@ impl InputNode for T { } } +impl InputNode for () { + fn type_to_node() -> tree::Node { + tree::NodeType::ProtoMessage("google.protobuf.Empty").into() + } + + fn data_to_node(&self) -> tree::Node { + tree::NodeType::ProtoMessage("google.protobuf.Empty").into() + } + + fn oneof_variant(&self) -> Option<&'static str> { + None + } + + fn parse_unknown(&self, _context: &mut context::Context<'_>) -> bool { + false + } +} + +impl ProtoMessage for () { + fn proto_message_type() -> &'static str { + "google.protobuf.Empty" + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/rs/src/output/diagnostic.rs b/rs/src/output/diagnostic.rs index 5f8614bf..ba204437 100644 --- a/rs/src/output/diagnostic.rs +++ b/rs/src/output/diagnostic.rs @@ -228,6 +228,9 @@ pub enum Classification { #[strum(props(Description = "failed to resolve type variation name"))] LinkMissingTypeVariationName = 3004, + #[strum(props(HiddenDescription = "use of anchor zero"))] + LinkAnchorZero = 3005, + // Type-related diagnostics (group 4). #[strum(props(HiddenDescription = "type-related diagnostics"))] Type = 4000, diff --git a/rs/src/parse/expressions/literals.rs b/rs/src/parse/expressions/literals.rs index 4b29a234..af502481 100644 --- a/rs/src/parse/expressions/literals.rs +++ b/rs/src/parse/expressions/literals.rs @@ -5,7 +5,9 @@ use crate::input::proto::substrait; use crate::output::data_type; use crate::output::diagnostic; +use crate::output::extension; use crate::parse::context; +use crate::parse::extensions; use crate::parse::types; use crate::string_util; use crate::string_util::Describe; @@ -36,13 +38,16 @@ enum LiteralValue { Binary(Vec), /// May be used only for IntervalYearToMonth and IntervalDayToSecond. - Interval(i32, i32), + Interval(i64, i64), /// May be used only for structs and lists. Items(Vec), /// May be used only for maps. Pairs(Vec<(Literal, Literal)>), + + /// A literal for a user-defined type, cannot parse more information. + UserDefined, } impl Default for LiteralValue { @@ -93,13 +98,14 @@ impl Literal { value: LiteralValue, simple: data_type::Simple, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { Ok(Literal { value, data_type: data_type::DataType::new( data_type::Class::Simple(simple), nullable, - None, + variation, vec![], )?, }) @@ -110,6 +116,7 @@ impl Literal { value: LiteralValue, compound: data_type::Compound, nullable: bool, + variation: Option>>, args: Vec, ) -> diagnostic::Result { Ok(Literal { @@ -117,7 +124,7 @@ impl Literal { data_type: data_type::DataType::new( data_type::Class::Compound(compound), nullable, - None, + variation, args.into_iter().map(|x| x.into()).collect(), )?, }) @@ -216,7 +223,11 @@ impl Describe for Literal { data_type::Class::Simple(data_type::Simple::IntervalYear) => { write!(f, "{a}y{b:+}m") } - data_type::Class::Simple(data_type::Simple::IntervalDay) => write!(f, "{a}d{b:+}s"), + data_type::Class::Simple(data_type::Simple::IntervalDay) => { + let s = b / 1000000; + let us = if b > &0 { *b } else { -b } % 1000000; + write!(f, "{a}d{s:+}.{us:06}s") + } _ => write!(f, "({a}, {b})"), }, LiteralValue::Items(x) => match self.data_type.class() { @@ -296,6 +307,7 @@ impl Describe for Literal { write!(f, ")") } }, + LiteralValue::UserDefined => write!(f, "(user-defined)"), } } } @@ -311,70 +323,122 @@ fn parse_boolean( x: &bool, _y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { Literal::new_simple( LiteralValue::Boolean(*x), data_type::Simple::Boolean, nullable, + variation, ) } /// Parses an i8 literal. -fn parse_i8(x: &i32, _y: &mut context::Context, nullable: bool) -> diagnostic::Result { +fn parse_i8( + x: &i32, + _y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { let x = i8::try_from(*x) .map_err(|_| cause!(ExpressionIllegalLiteralValue, "i8 value out of range"))?; Literal::new_simple( LiteralValue::Integer(x as i64), data_type::Simple::I8, nullable, + variation, ) } /// Parses an i16 literal. -fn parse_i16(x: &i32, _y: &mut context::Context, nullable: bool) -> diagnostic::Result { +fn parse_i16( + x: &i32, + _y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { let x = i16::try_from(*x) .map_err(|_| cause!(ExpressionIllegalLiteralValue, "i16 value out of range"))?; Literal::new_simple( LiteralValue::Integer(x as i64), data_type::Simple::I16, nullable, + variation, ) } /// Parses an i32 literal. -fn parse_i32(x: &i32, _y: &mut context::Context, nullable: bool) -> diagnostic::Result { +fn parse_i32( + x: &i32, + _y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { Literal::new_simple( LiteralValue::Integer(*x as i64), data_type::Simple::I32, nullable, + variation, ) } /// Parses an i64 literal. -fn parse_i64(x: &i64, _y: &mut context::Context, nullable: bool) -> diagnostic::Result { - Literal::new_simple(LiteralValue::Integer(*x), data_type::Simple::I64, nullable) +fn parse_i64( + x: &i64, + _y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { + Literal::new_simple( + LiteralValue::Integer(*x), + data_type::Simple::I64, + nullable, + variation, + ) } /// Parses an fp32 literal. -fn parse_fp32(x: &f32, _y: &mut context::Context, nullable: bool) -> diagnostic::Result { +fn parse_fp32( + x: &f32, + _y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { Literal::new_simple( LiteralValue::Float(*x as f64), data_type::Simple::Fp32, nullable, + variation, ) } /// Parses an fp64 literal. -fn parse_fp64(x: &f64, _y: &mut context::Context, nullable: bool) -> diagnostic::Result { - Literal::new_simple(LiteralValue::Float(*x), data_type::Simple::Fp64, nullable) +fn parse_fp64( + x: &f64, + _y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { + Literal::new_simple( + LiteralValue::Float(*x), + data_type::Simple::Fp64, + nullable, + variation, + ) } /// Parses a string literal. -fn parse_string(x: &str, _y: &mut context::Context, nullable: bool) -> diagnostic::Result { +fn parse_string( + x: &str, + _y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { Literal::new_simple( LiteralValue::String(x.to_string()), data_type::Simple::String, nullable, + variation, ) } @@ -383,11 +447,13 @@ fn parse_binary( x: &[u8], _y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { Literal::new_simple( LiteralValue::Binary(x.to_owned()), data_type::Simple::Binary, nullable, + variation, ) } @@ -396,6 +462,7 @@ fn parse_timestamp( x: &i64, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { let dt = to_date_time(*x)?; if dt < chrono::NaiveDate::from_ymd(1000, 1, 1).and_hms(0, 0, 0) @@ -412,6 +479,7 @@ fn parse_timestamp( LiteralValue::Integer(*x), data_type::Simple::Timestamp, nullable, + variation, ) } @@ -420,6 +488,7 @@ fn parse_timestamp_tz( x: &i64, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { let dt = to_date_time(*x)?; if dt < chrono::NaiveDate::from_ymd(1000, 1, 1).and_hms(0, 0, 0) @@ -436,11 +505,17 @@ fn parse_timestamp_tz( LiteralValue::Integer(*x), data_type::Simple::TimestampTz, nullable, + variation, ) } /// Parses a date literal. -fn parse_date(x: &i32, y: &mut context::Context, nullable: bool) -> diagnostic::Result { +fn parse_date( + x: &i32, + y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { let dt = to_date_time((*x as i64).saturating_mul(24 * 60 * 60 * 1_000_000))?; if dt < chrono::NaiveDate::from_ymd(1000, 1, 1).and_hms(0, 0, 0) || dt >= chrono::NaiveDate::from_ymd(10000, 1, 1).and_hms(0, 0, 0) @@ -456,11 +531,17 @@ fn parse_date(x: &i32, y: &mut context::Context, nullable: bool) -> diagnostic:: LiteralValue::Integer(*x as i64), data_type::Simple::Date, nullable, + variation, ) } /// Parses a time literal. -fn parse_time(x: &i64, y: &mut context::Context, nullable: bool) -> diagnostic::Result { +fn parse_time( + x: &i64, + y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { if *x < 0 || *x >= 24 * 60 * 60 * 1_000_000 { diagnostic!( y, @@ -469,7 +550,12 @@ fn parse_time(x: &i64, y: &mut context::Context, nullable: bool) -> diagnostic:: "time of day out of range 00:00:00.000000 to 23:59:59.999999" ); } - Literal::new_simple(LiteralValue::Integer(*x), data_type::Simple::Time, nullable) + Literal::new_simple( + LiteralValue::Integer(*x), + data_type::Simple::Time, + nullable, + variation, + ) } /// Parses a year to month interval literal. @@ -477,6 +563,7 @@ fn parse_interval_year_to_month( x: &substrait::expression::literal::IntervalYearToMonth, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { // FIXME: see FIXME for associated type. proto_primitive_field!(x, y, years, |x, _| { @@ -509,9 +596,10 @@ fn parse_interval_year_to_month( ); } Literal::new_simple( - LiteralValue::Interval(x.years, x.months), + LiteralValue::Interval(x.years.into(), x.months.into()), data_type::Simple::IntervalYear, nullable, + variation, ) } @@ -520,6 +608,7 @@ fn parse_interval_day_to_second( x: &substrait::expression::literal::IntervalDayToSecond, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { // FIXME: see FIXME for associated type. proto_primitive_field!(x, y, days, |x, _| { @@ -533,25 +622,32 @@ fn parse_interval_day_to_second( } }); - // FIXME: according to the docs, day to second supports microsecond - // precision. The literal doesn't. The i32 seconds also doesn't - // support the full specified range (but that range is weird - // anyway). proto_primitive_field!(x, y, seconds); + proto_primitive_field!(x, y, microseconds); Literal::new_simple( - LiteralValue::Interval(x.days, x.seconds), + LiteralValue::Interval( + x.days.into(), + i64::from(x.seconds) * 1000000 + i64::from(x.microseconds), + ), data_type::Simple::IntervalDay, nullable, + variation, ) } /// Parses a UUID literal. -fn parse_uuid(x: &[u8], _y: &mut context::Context, nullable: bool) -> diagnostic::Result { +fn parse_uuid( + x: &[u8], + _y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { if let Ok(x) = x.try_into() { Literal::new_simple( LiteralValue::Data16(i128::from_ne_bytes(x)), data_type::Simple::Uuid, nullable, + variation, ) } else { Err(cause!( @@ -567,11 +663,13 @@ fn parse_fixed_char( x: &str, _y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { Literal::new_compound( LiteralValue::String(x.to_string()), data_type::Compound::FixedChar, nullable, + variation, vec![x.len() as u64], ) } @@ -581,6 +679,7 @@ fn parse_var_char( x: &substrait::expression::literal::VarChar, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { proto_primitive_field!(x, y, length); let len = x.length as usize; @@ -598,6 +697,7 @@ fn parse_var_char( LiteralValue::String(x.value.clone()), data_type::Compound::VarChar, nullable, + variation, vec![len as u64], ) } @@ -607,11 +707,13 @@ fn parse_fixed_binary( x: &[u8], _y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { Literal::new_compound( LiteralValue::Binary(x.to_owned()), data_type::Compound::FixedBinary, nullable, + variation, vec![x.len() as u64], ) } @@ -621,6 +723,7 @@ fn parse_decimal( x: &substrait::expression::literal::Decimal, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { proto_primitive_field!(x, y, precision, |x, _| { if *x < 0 { @@ -660,6 +763,7 @@ fn parse_decimal( LiteralValue::Data16(val), data_type::Compound::Decimal, nullable, + variation, vec![precision, scale], ) } @@ -673,6 +777,7 @@ fn parse_struct_int( x: &substrait::expression::literal::Struct, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { let (values, types): (Vec<_>, Vec<_>) = proto_repeated_field!(x, y, fields, parse_literal) .1 @@ -687,6 +792,7 @@ fn parse_struct_int( LiteralValue::Items(values), data_type::Compound::Struct, nullable, + variation, types, ) } @@ -696,8 +802,9 @@ pub fn parse_struct( x: &substrait::expression::literal::Struct, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { - let literal = parse_struct_int(x, y, nullable)?; + let literal = parse_struct_int(x, y, nullable, variation)?; y.set_data_type(literal.data_type().clone()); Ok(literal) } @@ -707,6 +814,7 @@ fn parse_list( x: &substrait::expression::literal::List, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { let values: Vec<_> = proto_required_repeated_field!(x, y, values, parse_literal) .1 @@ -732,6 +840,7 @@ fn parse_list( LiteralValue::Items(values), data_type::Compound::List, nullable, + variation, vec![data_type], ) } @@ -741,6 +850,7 @@ fn parse_map( x: &substrait::expression::literal::Map, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { let values: Vec<_> = proto_required_repeated_field!(x, y, key_values, |x, y| { let key = proto_required_field!(x, y, key, parse_literal) @@ -781,6 +891,7 @@ fn parse_map( LiteralValue::Pairs(values), data_type::Compound::Map, nullable, + variation, vec![key_type, value_type], ) } @@ -789,7 +900,6 @@ fn parse_map( fn parse_empty_list( x: &substrait::r#type::List, y: &mut context::Context, - _nullable: bool, ) -> diagnostic::Result { // FIXME: nullability is redundantly specified, and the type // variation reference would be if it had gotten the same @@ -806,7 +916,6 @@ fn parse_empty_list( fn parse_empty_map( x: &substrait::r#type::Map, y: &mut context::Context, - _nullable: bool, ) -> diagnostic::Result { // FIXME: same note as for EmptyList. types::parse_map(x, y)?; @@ -817,11 +926,7 @@ fn parse_empty_map( } /// Parses a null literal. -fn parse_null( - x: &substrait::Type, - y: &mut context::Context, - _nullable: bool, -) -> diagnostic::Result { +fn parse_null(x: &substrait::Type, y: &mut context::Context) -> diagnostic::Result { // FIXME: same note as for EmptyList. types::parse_type(x, y)?; let data_type = y.data_type(); @@ -838,40 +943,75 @@ fn parse_null( } } +fn parse_user_defined( + x: &substrait::expression::literal::UserDefined, + y: &mut context::Context, + nullable: bool, + variation: Option>>, +) -> diagnostic::Result { + let extension_type = proto_primitive_field!( + x, + y, + type_reference, + extensions::simple::parse_type_reference + ) + .1; + proto_required_field!(x, y, value, extensions::advanced::parse_functional_any); + Ok(Literal { + value: LiteralValue::UserDefined, + data_type: data_type::DataType::new( + if let Some(extension_type) = extension_type { + data_type::Class::UserDefined(extension_type) + } else { + data_type::Class::Unresolved + }, + nullable, + variation, + vec![], + )?, + }) +} + /// Parse a literal value. Returns the parsed literal. fn parse_literal_type( x: &substrait::expression::literal::LiteralType, y: &mut context::Context, nullable: bool, + variation: Option>>, ) -> diagnostic::Result { use substrait::expression::literal::LiteralType; match x { - LiteralType::Boolean(x) => parse_boolean(x, y, nullable), - LiteralType::I8(x) => parse_i8(x, y, nullable), - LiteralType::I16(x) => parse_i16(x, y, nullable), - LiteralType::I32(x) => parse_i32(x, y, nullable), - LiteralType::I64(x) => parse_i64(x, y, nullable), - LiteralType::Fp32(x) => parse_fp32(x, y, nullable), - LiteralType::Fp64(x) => parse_fp64(x, y, nullable), - LiteralType::String(x) => parse_string(x, y, nullable), - LiteralType::Binary(x) => parse_binary(x, y, nullable), - LiteralType::Timestamp(x) => parse_timestamp(x, y, nullable), - LiteralType::TimestampTz(x) => parse_timestamp_tz(x, y, nullable), - LiteralType::Date(x) => parse_date(x, y, nullable), - LiteralType::Time(x) => parse_time(x, y, nullable), - LiteralType::IntervalYearToMonth(x) => parse_interval_year_to_month(x, y, nullable), - LiteralType::IntervalDayToSecond(x) => parse_interval_day_to_second(x, y, nullable), - LiteralType::Uuid(x) => parse_uuid(x, y, nullable), - LiteralType::FixedChar(x) => parse_fixed_char(x, y, nullable), - LiteralType::VarChar(x) => parse_var_char(x, y, nullable), - LiteralType::FixedBinary(x) => parse_fixed_binary(x, y, nullable), - LiteralType::Decimal(x) => parse_decimal(x, y, nullable), - LiteralType::Struct(x) => parse_struct_int(x, y, nullable), - LiteralType::List(x) => parse_list(x, y, nullable), - LiteralType::Map(x) => parse_map(x, y, nullable), - LiteralType::EmptyList(x) => parse_empty_list(x, y, nullable), - LiteralType::EmptyMap(x) => parse_empty_map(x, y, nullable), - LiteralType::Null(x) => parse_null(x, y, nullable), + LiteralType::Boolean(x) => parse_boolean(x, y, nullable, variation), + LiteralType::I8(x) => parse_i8(x, y, nullable, variation), + LiteralType::I16(x) => parse_i16(x, y, nullable, variation), + LiteralType::I32(x) => parse_i32(x, y, nullable, variation), + LiteralType::I64(x) => parse_i64(x, y, nullable, variation), + LiteralType::Fp32(x) => parse_fp32(x, y, nullable, variation), + LiteralType::Fp64(x) => parse_fp64(x, y, nullable, variation), + LiteralType::String(x) => parse_string(x, y, nullable, variation), + LiteralType::Binary(x) => parse_binary(x, y, nullable, variation), + LiteralType::Timestamp(x) => parse_timestamp(x, y, nullable, variation), + LiteralType::TimestampTz(x) => parse_timestamp_tz(x, y, nullable, variation), + LiteralType::Date(x) => parse_date(x, y, nullable, variation), + LiteralType::Time(x) => parse_time(x, y, nullable, variation), + LiteralType::IntervalYearToMonth(x) => { + parse_interval_year_to_month(x, y, nullable, variation) + } + LiteralType::IntervalDayToSecond(x) => { + parse_interval_day_to_second(x, y, nullable, variation) + } + LiteralType::Uuid(x) => parse_uuid(x, y, nullable, variation), + LiteralType::FixedChar(x) => parse_fixed_char(x, y, nullable, variation), + LiteralType::VarChar(x) => parse_var_char(x, y, nullable, variation), + LiteralType::FixedBinary(x) => parse_fixed_binary(x, y, nullable, variation), + LiteralType::Decimal(x) => parse_decimal(x, y, nullable, variation), + LiteralType::Struct(x) => parse_struct_int(x, y, nullable, variation), + LiteralType::List(x) => parse_list(x, y, nullable, variation), + LiteralType::Map(x) => parse_map(x, y, nullable, variation), + LiteralType::EmptyList(x) => parse_empty_list(x, y), + LiteralType::EmptyMap(x) => parse_empty_map(x, y), + LiteralType::Null(x) => parse_null(x, y), + LiteralType::UserDefined(x) => parse_user_defined(x, y, nullable, variation), } } @@ -880,34 +1020,27 @@ pub fn parse_literal( x: &substrait::expression::Literal, y: &mut context::Context, ) -> diagnostic::Result { - // Parse type parameters that apply to all literals (except empty objects - // and null...). - if !matches!( + // Whether the nullability and variation attributes are stored in this + // message or in an embedded type message. + let attributes_here = !matches!( x.literal_type, Some(substrait::expression::literal::LiteralType::EmptyList(_)) | Some(substrait::expression::literal::LiteralType::EmptyMap(_)) | Some(substrait::expression::literal::LiteralType::Null(_)) - ) { - // FIXME: why isn't the nullability enum used here? Especially - // considering nullability here actually should be unspecified when - // above match yields false, while it must be specified everywhere - // else. Better yet, change the semantics as described in the other - // fixmes such that it is always mandatory everywhere, and then use - // a boolean everywhere? If the point of the enum is to allow types - // to be "partially unresolved," then the type system is pretty - // fundamentally broken, since overload resolution depends on it. - proto_primitive_field!(x, y, nullable); + ); - // FIXME: why would literals not support type variations? Feels like - // there should be a type variation reference here. + // Parse nullability attribute. + // FIXME: why isn't the nullability enum used here? Especially + // considering nullability here actually should be unspecified when + // above match yields false, while it must be specified everywhere + // else. Better yet, change the semantics as described in the other + // fixmes such that it is always mandatory everywhere, and then use + // a boolean everywhere? If the point of the enum is to allow types + // to be "partially unresolved," then the type system is pretty + // fundamentally broken, since overload resolution depends on it. + if attributes_here { + proto_primitive_field!(x, y, nullable); } else { - // FIXME: this is all very ugly. Since all types can be made nullable - // anyway, why isn't the nullability field taken out of the type kind - // for types as well? Then the "empty" values can just refer to the - // type kind rather than the whole type message, and the problem would - // be solved. Likewise, I don't see why type variations should get - // special treatment in the sense that (currently) user-defined types - // can't also have variations. Why explicitly disallow that? proto_primitive_field!(x, y, nullable, |x, y| { // Send diagnostic only when x is not set to its default value, // since the default value is indistinguishable from unspecified. @@ -928,10 +1061,49 @@ pub fn parse_literal( }); } - // Parse the literal value. - let literal = proto_required_field!(x, y, literal_type, parse_literal_type, x.nullable) + // Parse variation. + let variation = if attributes_here { + proto_primitive_field!( + x, + y, + type_variation_reference, + extensions::simple::parse_type_variation_reference + ) .1 - .unwrap_or_default(); + .flatten() + } else { + proto_primitive_field!(x, y, type_variation_reference, |x, y| { + // Send diagnostic only when x is not set to its default value, + // since the default value is indistinguishable from unspecified. + if x != &0 { + diagnostic!( + y, + Info, + RedundantField, + "this field is inoperative for empty lists, empty maps, and null." + ); + } else { + comment!( + y, + "This field is inoperative for empty lists, empty maps, and null." + ); + } + Ok(()) + }); + None + }; + + // Parse the literal value. + let literal = proto_required_field!( + x, + y, + literal_type, + parse_literal_type, + x.nullable, + variation + ) + .1 + .unwrap_or_default(); // Describe node. y.set_data_type(literal.data_type().clone()); diff --git a/rs/src/parse/extensions/simple/mod.rs b/rs/src/parse/extensions/simple/mod.rs index f5dc8881..e9b7ab27 100644 --- a/rs/src/parse/extensions/simple/mod.rs +++ b/rs/src/parse/extensions/simple/mod.rs @@ -27,16 +27,18 @@ pub fn parse_name(x: &String, _y: &mut context::Context) -> Result { } } -/// "Parse" an anchor. This just reports an error if the anchor is 0. -fn parse_anchor(x: &u32, _y: &mut context::Context) -> Result { +/// "Parse" an anchor. This just reports a warning if the anchor is 0. +fn parse_anchor(x: &u32, y: &mut context::Context) -> Result { if *x == 0 { - Err(cause!( - IllegalValue, - "anchor 0 is reserved to disambiguate unspecified optional references" - )) - } else { - Ok(*x) + diagnostic!( + y, + Warning, + LinkAnchorZero, + "use of anchor zero is discouraged, as references set to \ + zero may be confused with \"unspecified\"." + ); } + Ok(*x) } /// Parse a mapping from a URI anchor to a YAML extension. @@ -91,19 +93,24 @@ fn describe_reference(y: &mut context::Context, reference: &Arc Result>> { +) -> Result>>> { match y.tvars().resolve(x).cloned() { Some((variation, path)) => { describe_reference(y, &variation); link!(y, path, "Type variation anchor is defined here"); - Ok(variation) + Ok(Some(variation)) } None => { - describe!(y, Misc, "Unresolved type variation"); - Err(cause!( - LinkMissingAnchor, - "Type variation anchor {x} does not exist" - )) + if x == &0 { + describe!(y, Misc, "Implicit default type variation"); + Ok(None) + } else { + describe!(y, Misc, "Unresolved type variation"); + Err(cause!( + LinkMissingAnchor, + "Type variation anchor {x} does not exist" + )) + } } } } diff --git a/rs/src/parse/relations/read.rs b/rs/src/parse/relations/read.rs index df65de7e..c35cf969 100644 --- a/rs/src/parse/relations/read.rs +++ b/rs/src/parse/relations/read.rs @@ -39,7 +39,7 @@ fn parse_virtual_table( // Parse rows, ensuring that they all have the same type. proto_repeated_field!(x, y, values, |x, y| { - let result = literals::parse_struct(x, y, false); + let result = literals::parse_struct(x, y, false, None); data_type = types::assert_equal( y, &y.data_type(), diff --git a/rs/src/parse/types.rs b/rs/src/parse/types.rs index 3ebb5194..e44b6499 100644 --- a/rs/src/parse/types.rs +++ b/rs/src/parse/types.rs @@ -29,18 +29,6 @@ fn parse_required_nullability( } } -/// Parses an optional type variation reference. -fn parse_type_variation_reference( - x: &u32, - y: &mut context::Context, -) -> diagnostic::Result { - if *x == 0 { - Ok(None) - } else { - Some(extensions::simple::parse_type_variation_reference(x, y)).transpose() - } -} - /// Parses an unsigned integer type parameter. fn parse_integral_type_parameter( x: &i32, @@ -67,7 +55,7 @@ macro_rules! parse_simple_type { $input, $context, type_variation_reference, - parse_type_variation_reference + extensions::simple::parse_type_variation_reference ) .1; @@ -211,7 +199,7 @@ macro_rules! parse_compound_type_with_length { $input, $context, type_variation_reference, - parse_type_variation_reference + extensions::simple::parse_type_variation_reference ) .1; @@ -282,7 +270,7 @@ pub fn parse_decimal( x, y, type_variation_reference, - parse_type_variation_reference + extensions::simple::parse_type_variation_reference ) .1; @@ -331,7 +319,7 @@ pub fn parse_struct( x, y, type_variation_reference, - parse_type_variation_reference + extensions::simple::parse_type_variation_reference ) .1; @@ -375,7 +363,7 @@ pub fn parse_list(x: &substrait::r#type::List, y: &mut context::Context) -> diag x, y, type_variation_reference, - parse_type_variation_reference + extensions::simple::parse_type_variation_reference ) .1; @@ -424,7 +412,7 @@ pub fn parse_map(x: &substrait::r#type::Map, y: &mut context::Context) -> diagno x, y, type_variation_reference, - parse_type_variation_reference + extensions::simple::parse_type_variation_reference ) .1; @@ -450,6 +438,9 @@ pub fn parse_map(x: &substrait::r#type::Map, y: &mut context::Context) -> diagno /// Parses a user-defined type. pub fn parse_user_defined(x: &u32, y: &mut context::Context) -> diagnostic::Result<()> { + // FIXME: why can't user-defined types also have type variations associated + // with them? + // Parse fields. let user_type = extensions::simple::parse_type_reference(x, y) .map_err(|e| diagnostic!(y, Error, e)) diff --git a/substrait b/substrait index 88463636..8e206b95 160000 --- a/substrait +++ b/substrait @@ -1 +1 @@ -Subproject commit 88463636b22a503adeddd9cb4da1295bbc5b15be +Subproject commit 8e206b9594880886c513c8437663fac15e0dfe59 diff --git a/tests/tests/expressions/literals/interval_day.yaml b/tests/tests/expressions/literals/interval_day.yaml index 5adedae2..b073abd9 100644 --- a/tests/tests/expressions/literals/interval_day.yaml +++ b/tests/tests/expressions/literals/interval_day.yaml @@ -19,50 +19,71 @@ plan: - interval_day_to_second: days: 0 seconds: 0 + microseconds: 0 nullable: false - interval_day_to_second: days: 123 seconds: 456 + microseconds: 789000 nullable: true - fields: - interval_day_to_second: days: 3650000 seconds: 0 + microseconds: 0 nullable: false - interval_day_to_second: days: 3650001 days__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ] seconds: 0 + microseconds: 0 nullable: true - fields: - interval_day_to_second: days: -3650000 seconds: 0 + microseconds: 0 nullable: false - interval_day_to_second: days: -3650001 days__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ] seconds: 0 + microseconds: 0 nullable: true - fields: - interval_day_to_second: days: -2147483648 days__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ] seconds: 0 + microseconds: 0 nullable: false - interval_day_to_second: days: 2147483647 days__test: [ diag: { level: e, code: 6002, msg: "*out of range*" } ] seconds: 0 + microseconds: 0 nullable: true - fields: - interval_day_to_second: days: 0 seconds: -2147483648 + microseconds: 0 nullable: false - interval_day_to_second: days: 0 seconds: 2147483647 + microseconds: 0 + nullable: true + - fields: + - interval_day_to_second: + days: 0 + seconds: 0 + microseconds: -2147483648 + nullable: false + - interval_day_to_second: + days: 0 + seconds: 0 + microseconds: 2147483647 nullable: true - fields: - "null": From 337ac1bcbd3e80b6e9954d33b1b2de9c5eb35c4b Mon Sep 17 00:00:00 2001 From: Jeroen van Straten Date: Mon, 23 May 2022 14:33:56 +0200 Subject: [PATCH 2/2] feat: implement new syntax for function arguments --- rs/src/output/diagnostic.rs | 3 + rs/src/parse/expressions/conditionals.rs | 36 ++--- rs/src/parse/expressions/functions.rs | 167 ++++++++++++++++++++- rs/src/parse/expressions/literals.rs | 4 +- rs/src/parse/expressions/misc.rs | 4 +- rs/src/parse/expressions/mod.rs | 113 ++++++++++---- rs/src/parse/expressions/references/mod.rs | 6 +- rs/src/parse/relations/aggregate.rs | 6 +- substrait | 2 +- tests/tests/tpc-h/tpc-h01.yaml | 1 + tests/tests/tpc-h/tpc-h02.yaml | 1 + tests/tests/tpc-h/tpc-h03.yaml | 1 + tests/tests/tpc-h/tpc-h04.yaml | 1 + tests/tests/tpc-h/tpc-h05.yaml | 1 + tests/tests/tpc-h/tpc-h06.yaml | 1 + tests/tests/tpc-h/tpc-h07.yaml | 1 + tests/tests/tpc-h/tpc-h08.yaml | 1 + tests/tests/tpc-h/tpc-h09.yaml | 1 + tests/tests/tpc-h/tpc-h10.yaml | 1 + tests/tests/tpc-h/tpc-h14.yaml | 1 + tests/tests/tpc-h/tpc-h19.yaml | 1 + 21 files changed, 286 insertions(+), 67 deletions(-) diff --git a/rs/src/output/diagnostic.rs b/rs/src/output/diagnostic.rs index ba204437..58f885ee 100644 --- a/rs/src/output/diagnostic.rs +++ b/rs/src/output/diagnostic.rs @@ -171,6 +171,9 @@ pub enum Classification { #[strum(props(Description = "illegal glob"))] IllegalGlob = 5, + #[strum(props(Description = "deprecation"))] + Deprecation = 6, + // Protobuf-related diagnostics (group 1). #[strum(props(HiddenDescription = "protobuf-related diagnostic"))] Proto = 1000, diff --git a/rs/src/parse/expressions/conditionals.rs b/rs/src/parse/expressions/conditionals.rs index 8b298fc7..cc0d30c1 100644 --- a/rs/src/parse/expressions/conditionals.rs +++ b/rs/src/parse/expressions/conditionals.rs @@ -54,8 +54,8 @@ pub fn parse_if_then( // Save to the "arguments" of the function we'll use to describe this // expression. - args.push(condition); - args.push(value); + args.push(condition.into()); + args.push(value.into()); Ok(()) }); @@ -76,14 +76,14 @@ pub fn parse_if_then( // Save to the "arguments" of the function we'll use to describe this // expression. - args.push(value); + args.push(value.into()); } else { // Allow missing else, making the type nullable. comment!(y, "Otherwise, yield null."); return_type = return_type.make_nullable(); // Yield null for the else clause. - args.push(expressions::Expression::new_null(return_type.clone())); + args.push(expressions::Expression::new_null(return_type.clone()).into()); } // Describe node. @@ -110,7 +110,7 @@ pub fn parse_switch( // Parse value to match. let (n, e) = proto_boxed_required_field!(x, y, r#match, expressions::parse_expression); let mut match_type = n.data_type(); - args.push(e.unwrap_or_default()); + args.push(e.unwrap_or_default().into()); // Handle branches. proto_required_repeated_field!(x, y, ifs, |x, y| { @@ -143,8 +143,8 @@ pub fn parse_switch( // Save to the "arguments" of the function we'll use to describe this // expression. - args.push(match_value.into()); - args.push(value); + args.push(expressions::Expression::from(match_value).into()); + args.push(value.into()); Ok(()) }); @@ -165,14 +165,14 @@ pub fn parse_switch( // Save to the "arguments" of the function we'll use to describe this // expression. - args.push(value); + args.push(value.into()); } else { // Allow missing else, making the type nullable. comment!(y, "Otherwise, yield null."); return_type = return_type.make_nullable(); // Yield null for the else clause. - args.push(expressions::Expression::new_null(return_type.clone())); + args.push(expressions::Expression::new_null(return_type.clone()).into()); } // Describe node. @@ -200,13 +200,13 @@ pub fn parse_singular_or_list( // Parse value to match. let (n, e) = proto_boxed_required_field!(x, y, value, expressions::parse_expression); let match_type = n.data_type(); - args.push(e.unwrap_or_default()); + args.push(e.unwrap_or_default().into()); // Handle allowed values. proto_required_repeated_field!(x, y, options, |x, y| { let expression = expressions::parse_expression(x, y)?; let value_type = y.data_type(); - args.push(expression); + args.push(expression.into()); // Check that the type is the same as the value. types::assert_equal( @@ -249,17 +249,19 @@ pub fn parse_multi_or_list( // Parse value to match. let (ns, es) = proto_required_repeated_field!(x, y, value, expressions::parse_expression); let match_types = ns.iter().map(|x| x.data_type()).collect::>(); - args.push(expressions::Expression::Tuple( - es.into_iter().map(|x| x.unwrap_or_default()).collect(), - )); + args.push( + expressions::Expression::Tuple(es.into_iter().map(|x| x.unwrap_or_default()).collect()) + .into(), + ); // Handle allowed values. proto_required_repeated_field!(x, y, options, |x, y| { let (ns, es) = proto_required_repeated_field!(x, y, fields, expressions::parse_expression); let value_types = ns.iter().map(|x| x.data_type()).collect::>(); - args.push(expressions::Expression::Tuple( - es.into_iter().map(|x| x.unwrap_or_default()).collect(), - )); + args.push( + expressions::Expression::Tuple(es.into_iter().map(|x| x.unwrap_or_default()).collect()) + .into(), + ); // Check that the type is the same as the value. if match_types.len() != value_types.len() { diff --git a/rs/src/parse/expressions/functions.rs b/rs/src/parse/expressions/functions.rs index a37ec938..b070eb04 100644 --- a/rs/src/parse/expressions/functions.rs +++ b/rs/src/parse/expressions/functions.rs @@ -12,8 +12,119 @@ use crate::parse::expressions; use crate::parse::extensions; use crate::parse::sorts; use crate::parse::types; +use crate::string_util; +use crate::string_util::Describe; use std::sync::Arc; +/// A function argument; either a value, a type, or an enum option. +#[derive(Clone, Debug, PartialEq)] +pub enum FunctionArgument { + /// Used for value arguments or normal expressions. + Value(expressions::Expression), + + /// Used for type arguments. + Type(Arc), + + /// Used for enum option arguments. + Enum(Option), +} + +impl Default for FunctionArgument { + fn default() -> Self { + FunctionArgument::Value(expressions::Expression::default()) + } +} + +impl From for FunctionArgument { + fn from(expr: expressions::Expression) -> Self { + FunctionArgument::Value(expr) + } +} + +impl Describe for FunctionArgument { + fn describe( + &self, + f: &mut std::fmt::Formatter<'_>, + limit: string_util::Limit, + ) -> std::fmt::Result { + match self { + FunctionArgument::Value(e) => e.describe(f, limit), + FunctionArgument::Type(e) => e.describe(f, limit), + FunctionArgument::Enum(Some(x)) => string_util::describe_identifier(f, x, limit), + FunctionArgument::Enum(None) => write!(f, "-"), + } + } +} + +impl std::fmt::Display for FunctionArgument { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.display().fmt(f) + } +} + +/// Parse an enum option argument type. +fn parse_enum_type( + x: &substrait::function_argument::r#enum::EnumKind, + _y: &mut context::Context, +) -> diagnostic::Result> { + match x { + substrait::function_argument::r#enum::EnumKind::Specified(x) => Ok(Some(x.clone())), + substrait::function_argument::r#enum::EnumKind::Unspecified(_) => Ok(None), + } +} + +/// Parse an enum option argument. +fn parse_enum( + x: &substrait::function_argument::Enum, + y: &mut context::Context, +) -> diagnostic::Result> { + Ok(proto_required_field!(x, y, enum_kind, parse_enum_type) + .1 + .flatten()) +} + +/// Parse a 0.3.0+ function argument type. +fn parse_function_argument_type( + x: &substrait::function_argument::ArgType, + y: &mut context::Context, +) -> diagnostic::Result { + match x { + substrait::function_argument::ArgType::Enum(x) => { + Ok(FunctionArgument::Enum(parse_enum(x, y)?)) + } + substrait::function_argument::ArgType::Type(x) => { + types::parse_type(x, y)?; + Ok(FunctionArgument::Type(y.data_type())) + } + substrait::function_argument::ArgType::Value(x) => Ok(FunctionArgument::Value( + expressions::parse_expression(x, y)?, + )), + } +} + +/// Parse a 0.3.0+ function argument. +fn parse_function_argument( + x: &substrait::FunctionArgument, + y: &mut context::Context, +) -> diagnostic::Result { + Ok( + proto_required_field!(x, y, arg_type, parse_function_argument_type) + .1 + .unwrap_or_default(), + ) +} + +/// Parse a pre-0.3.0 function argument expression. +fn parse_legacy_function_argument( + x: &substrait::Expression, + y: &mut context::Context, +) -> diagnostic::Result { + expressions::parse_legacy_function_argument(x, y).map(|x| match x { + expressions::ExpressionOrEnum::Value(x) => FunctionArgument::Value(x), + expressions::ExpressionOrEnum::Enum(x) => FunctionArgument::Enum(x), + }) +} + /// Matches a function call with its YAML definition, yielding its return type. /// Yields an unresolved type if resolution fails. pub fn check_function( @@ -41,7 +152,8 @@ pub fn check_function( fn parse_function( y: &mut context::Context, function: Option>>, - arguments: (Vec>, Vec>), + arguments: (Vec>, Vec>), + legacy_arguments: (Vec>, Vec>), return_type: Arc, ) -> (Arc, expressions::Expression) { // Determine the name of the function. @@ -50,6 +162,36 @@ fn parse_function( .map(|x| x.name.to_string()) .unwrap_or_else(|| String::from("?")); + // Reconcile v3.0.0+ vs older function argument syntax. + let arguments = if legacy_arguments.1.is_empty() { + arguments + } else if arguments.1.is_empty() { + diagnostic!( + y, + Warning, + Deprecation, + "the args field for specifying function arguments was deprecated Substrait 0.3.0 (#161)" + ); + legacy_arguments + } else { + if arguments != legacy_arguments { + diagnostic!( + y, + Error, + IllegalValue, + "mismatch between v0.3+ and legacy function argument specification" + ); + comment!( + y, + "If both the v0.3+ and legacy syntax is used to specify function \ + arguments, please make sure both map to the same arguments. If \ + the argument pack is not representable using the legacy syntax, \ + do not use it." + ); + } + arguments + }; + // Unpack the arguments into the function's enum options and regular // arguments. let mut opt_values = vec![]; @@ -61,7 +203,7 @@ fn parse_function( .into_iter() .zip(arguments.1.into_iter().map(|x| x.unwrap_or_default())) { - if let expressions::Expression::EnumVariant(x) = &expr { + if let FunctionArgument::Enum(x) = &expr { if opt_exprs.is_empty() && !arg_exprs.is_empty() { diagnostic!( y, @@ -122,13 +264,16 @@ pub fn parse_scalar_function( extensions::simple::parse_function_reference ) .1; - let arguments = proto_repeated_field!(x, y, args, expressions::parse_function_argument); + #[allow(deprecated)] + let legacy_arguments = proto_repeated_field!(x, y, args, parse_legacy_function_argument); + let arguments = proto_repeated_field!(x, y, arguments, parse_function_argument); let return_type = proto_required_field!(x, y, output_type, types::parse_type) .0 .data_type(); // Check function information. - let (return_type, expression) = parse_function(y, function, arguments, return_type); + let (return_type, expression) = + parse_function(y, function, arguments, legacy_arguments, return_type); // Describe node. y.set_data_type(return_type); @@ -168,13 +313,16 @@ pub fn parse_window_function( extensions::simple::parse_function_reference ) .1; - let arguments = proto_repeated_field!(x, y, args, expressions::parse_function_argument); + #[allow(deprecated)] + let legacy_arguments = proto_repeated_field!(x, y, args, parse_legacy_function_argument); + let arguments = proto_repeated_field!(x, y, arguments, parse_function_argument); let return_type = proto_required_field!(x, y, output_type, types::parse_type) .0 .data_type(); // Check function information. - let (return_type, expression) = parse_function(y, function, arguments, return_type); + let (return_type, expression) = + parse_function(y, function, arguments, legacy_arguments, return_type); // Parse modifiers. proto_repeated_field!(x, y, partitions, expressions::parse_expression); @@ -216,13 +364,16 @@ pub fn parse_aggregate_function( extensions::simple::parse_function_reference ) .1; - let arguments = proto_repeated_field!(x, y, args, expressions::parse_function_argument); + #[allow(deprecated)] + let legacy_arguments = proto_repeated_field!(x, y, args, parse_legacy_function_argument); + let arguments = proto_repeated_field!(x, y, arguments, parse_function_argument); let return_type = proto_required_field!(x, y, output_type, types::parse_type) .0 .data_type(); // Check function information. - let (return_type, expression) = parse_function(y, function, arguments, return_type); + let (return_type, expression) = + parse_function(y, function, arguments, legacy_arguments, return_type); // Parse modifiers. proto_repeated_field!(x, y, sorts, sorts::parse_sort_field); diff --git a/rs/src/parse/expressions/literals.rs b/rs/src/parse/expressions/literals.rs index af502481..1ec9f33c 100644 --- a/rs/src/parse/expressions/literals.rs +++ b/rs/src/parse/expressions/literals.rs @@ -14,7 +14,7 @@ use crate::string_util::Describe; use std::sync::Arc; /// The value of a literal, not including type information. -#[derive(Clone)] +#[derive(Clone, Debug, PartialEq)] enum LiteralValue { /// May be used for any nullable type. Null, @@ -57,7 +57,7 @@ impl Default for LiteralValue { } /// A complete literal, including type information. -#[derive(Clone, Default)] +#[derive(Clone, Debug, Default, PartialEq)] pub struct Literal { /// The value of the literal. value: LiteralValue, diff --git a/rs/src/parse/expressions/misc.rs b/rs/src/parse/expressions/misc.rs index 0dcf9408..168296ad 100644 --- a/rs/src/parse/expressions/misc.rs +++ b/rs/src/parse/expressions/misc.rs @@ -13,7 +13,7 @@ use crate::string_util; pub fn parse_enum( x: &substrait::expression::Enum, y: &mut context::Context, -) -> diagnostic::Result { +) -> diagnostic::Result { // Parse variant. let variant = proto_required_field!(x, y, enum_kind, |x, y| { match x { @@ -41,7 +41,7 @@ pub fn parse_enum( describe!(y, Misc, "Default function option variant"); } - Ok(expressions::Expression::EnumVariant(variant)) + Ok(expressions::ExpressionOrEnum::Enum(variant)) } /// Parse a typecast expression. Returns a description of said expression. diff --git a/rs/src/parse/expressions/mod.rs b/rs/src/parse/expressions/mod.rs index f6c74070..66171b85 100644 --- a/rs/src/parse/expressions/mod.rs +++ b/rs/src/parse/expressions/mod.rs @@ -18,7 +18,7 @@ pub mod references; pub mod subqueries; /// Description of an expression. -#[derive(Clone)] +#[derive(Clone, Debug, PartialEq)] pub enum Expression { /// Used for unknown expression types. Unresolved, @@ -31,7 +31,7 @@ pub enum Expression { /// Used for function calls and conditionals (which, really, are just /// builtin function calls). - Function(String, Vec), + Function(String, Vec), /// Used for subqueries, or anything else where the "arguments" are too /// extensive to be reasonably described; the argument list is always @@ -43,11 +43,6 @@ pub enum Expression { /// Used for type casts. Cast(Arc, Box), - - /// Used for function option enum variants. Note that these aren't normal - /// expressions, as they have no associated type. See FIXME at the bottom - /// of this file. - EnumVariant(Option), } impl Default for Expression { @@ -103,8 +98,6 @@ impl Describe for Expression { expression.describe(f, expr_limit)?; write!(f, ")") } - Expression::EnumVariant(Some(x)) => string_util::describe_identifier(f, x, limit), - Expression::EnumVariant(None) => write!(f, "-"), } } } @@ -122,29 +115,82 @@ impl Expression { } } +/// Expressions may include enums to support the legacy function argument +/// specification method. +#[derive(Clone, Debug)] +pub enum ExpressionOrEnum { + /// Used for value arguments or normal expressions. + Value(Expression), + + /// Used for enum function arguments. + Enum(Option), +} + +impl Default for ExpressionOrEnum { + fn default() -> Self { + ExpressionOrEnum::Value(Expression::Unresolved) + } +} + +impl From for ExpressionOrEnum { + fn from(e: Expression) -> Self { + ExpressionOrEnum::Value(e) + } +} + +impl Describe for ExpressionOrEnum { + fn describe( + &self, + f: &mut std::fmt::Formatter<'_>, + limit: string_util::Limit, + ) -> std::fmt::Result { + match self { + ExpressionOrEnum::Value(e) => e.describe(f, limit), + ExpressionOrEnum::Enum(Some(x)) => string_util::describe_identifier(f, x, limit), + ExpressionOrEnum::Enum(None) => write!(f, "-"), + } + } +} + +impl std::fmt::Display for ExpressionOrEnum { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.display().fmt(f) + } +} + /// Parse an expression type. Returns a description of said expression. fn parse_expression_type( x: &substrait::expression::RexType, y: &mut context::Context, enum_allowed: bool, -) -> diagnostic::Result { - match x { +) -> diagnostic::Result { + Ok(match x { substrait::expression::RexType::Literal(x) => { - literals::parse_literal(x, y).map(Expression::from) + literals::parse_literal(x, y).map(Expression::from)?.into() } substrait::expression::RexType::Selection(x) => { - references::parse_field_reference(x.as_ref(), y).map(Expression::from) + references::parse_field_reference(x.as_ref(), y) + .map(Expression::from)? + .into() + } + substrait::expression::RexType::ScalarFunction(x) => { + functions::parse_scalar_function(x, y)?.into() + } + substrait::expression::RexType::WindowFunction(x) => { + functions::parse_window_function(x, y)?.into() + } + substrait::expression::RexType::IfThen(x) => { + conditionals::parse_if_then(x.as_ref(), y)?.into() } - substrait::expression::RexType::ScalarFunction(x) => functions::parse_scalar_function(x, y), - substrait::expression::RexType::WindowFunction(x) => functions::parse_window_function(x, y), - substrait::expression::RexType::IfThen(x) => conditionals::parse_if_then(x.as_ref(), y), substrait::expression::RexType::SwitchExpression(x) => { - conditionals::parse_switch(x.as_ref(), y) + conditionals::parse_switch(x.as_ref(), y)?.into() } substrait::expression::RexType::SingularOrList(x) => { - conditionals::parse_singular_or_list(x.as_ref(), y) + conditionals::parse_singular_or_list(x.as_ref(), y)?.into() + } + substrait::expression::RexType::MultiOrList(x) => { + conditionals::parse_multi_or_list(x, y)?.into() } - substrait::expression::RexType::MultiOrList(x) => conditionals::parse_multi_or_list(x, y), substrait::expression::RexType::Enum(x) => { if !enum_allowed { diagnostic!( @@ -154,11 +200,13 @@ fn parse_expression_type( "function option enum variants are not allowed here" ); } - misc::parse_enum(x, y) + misc::parse_enum(x, y)? } - substrait::expression::RexType::Cast(x) => misc::parse_cast(x.as_ref(), y), - substrait::expression::RexType::Subquery(x) => subqueries::parse_subquery(x.as_ref(), y), - } + substrait::expression::RexType::Cast(x) => misc::parse_cast(x.as_ref(), y)?.into(), + substrait::expression::RexType::Subquery(x) => { + subqueries::parse_subquery(x.as_ref(), y)?.into() + } + }) } /// Parse an expression. Returns a description of said expression. @@ -166,7 +214,7 @@ fn parse_expression_internal( x: &substrait::Expression, y: &mut context::Context, enum_allowed: bool, -) -> diagnostic::Result { +) -> diagnostic::Result { // Parse the expression. let (n, e) = proto_required_field!(x, y, rex_type, parse_expression_type, enum_allowed); let expression = e.unwrap_or_default(); @@ -185,7 +233,10 @@ pub fn parse_expression( x: &substrait::Expression, y: &mut context::Context, ) -> diagnostic::Result { - parse_expression_internal(x, y, false) + match parse_expression_internal(x, y, false)? { + ExpressionOrEnum::Value(x) => Ok(x), + ExpressionOrEnum::Enum(_) => Err(cause!(IllegalValue, "enums are not allowed here")), + } } /// Parse a predicate expression (a normal expression that yields a boolean). @@ -194,7 +245,7 @@ pub fn parse_predicate( x: &substrait::Expression, y: &mut context::Context, ) -> diagnostic::Result { - let expression = parse_expression_internal(x, y, false)?; + let expression = parse_expression(x, y)?; let data_type = y.data_type(); if !matches!( data_type.class(), @@ -211,13 +262,11 @@ pub fn parse_predicate( Ok(expression) } -/// Parse a function argument, which can be an expression or an enum option. -fn parse_function_argument( +/// Parse a legacy function argument, which can be an expression or an enum +/// option. +fn parse_legacy_function_argument( x: &substrait::Expression, y: &mut context::Context, -) -> diagnostic::Result { +) -> diagnostic::Result { parse_expression_internal(x, y, true) } - -// FIXME: above should really be solved with a oneof, or better yet, by -// separating the options passed to a function from its arguments. diff --git a/rs/src/parse/expressions/references/mod.rs b/rs/src/parse/expressions/references/mod.rs index 15ef94d9..6c1e5450 100644 --- a/rs/src/parse/expressions/references/mod.rs +++ b/rs/src/parse/expressions/references/mod.rs @@ -16,7 +16,7 @@ pub mod mask; pub mod scalar; /// Description of the root of a reference. -#[derive(Clone)] +#[derive(Clone, Debug, PartialEq)] enum Root { Unresolved, Expression(expressions::Expression), @@ -36,7 +36,7 @@ impl Default for Root { } /// Description of a reference path. -#[derive(Clone)] +#[derive(Clone, Debug, PartialEq)] pub struct ReferencePath { // *Reversed* list of segments. segments: Vec, @@ -94,7 +94,7 @@ impl std::fmt::Display for ReferencePath { } /// Description of a reference. -#[derive(Clone)] +#[derive(Clone, Debug, PartialEq)] pub struct Reference { root: Root, path: ReferencePath, diff --git a/rs/src/parse/relations/aggregate.rs b/rs/src/parse/relations/aggregate.rs index 15ed4d63..58b0c18e 100644 --- a/rs/src/parse/relations/aggregate.rs +++ b/rs/src/parse/relations/aggregate.rs @@ -68,8 +68,10 @@ fn parse_measure( "Applies aggregate function {expression:#} to all rows for \ which {filter:#} returns true." ); - let filtered_expression = - expressions::Expression::Function(String::from("filter"), vec![filter, expression]); + let filtered_expression = expressions::Expression::Function( + String::from("filter"), + vec![filter.into(), expression.into()], + ); describe!( y, Expression, diff --git a/substrait b/substrait index 8e206b95..dab71bf5 160000 --- a/substrait +++ b/substrait @@ -1 +1 @@ -Subproject commit 8e206b9594880886c513c8437663fac15e0dfe59 +Subproject commit dab71bf5542a1ce491eefb5627764f854c46eef2 diff --git a/tests/tests/tpc-h/tpc-h01.yaml b/tests/tests/tpc-h/tpc-h01.yaml index de6a273f..7238883e 100644 --- a/tests/tests/tpc-h/tpc-h01.yaml +++ b/tests/tests/tpc-h/tpc-h01.yaml @@ -23,6 +23,7 @@ name: TPC-H01 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h02.yaml b/tests/tests/tpc-h/tpc-h02.yaml index f46139cc..670b213d 100644 --- a/tests/tests/tpc-h/tpc-h02.yaml +++ b/tests/tests/tpc-h/tpc-h02.yaml @@ -49,6 +49,7 @@ name: TPC-H02 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h03.yaml b/tests/tests/tpc-h/tpc-h03.yaml index d1a39140..5f39abd1 100644 --- a/tests/tests/tpc-h/tpc-h03.yaml +++ b/tests/tests/tpc-h/tpc-h03.yaml @@ -28,6 +28,7 @@ name: TPC-H03 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h04.yaml b/tests/tests/tpc-h/tpc-h04.yaml index c99a0941..279ce6f5 100644 --- a/tests/tests/tpc-h/tpc-h04.yaml +++ b/tests/tests/tpc-h/tpc-h04.yaml @@ -25,6 +25,7 @@ name: TPC-H04 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h05.yaml b/tests/tests/tpc-h/tpc-h05.yaml index 5e39be72..2d06d39a 100644 --- a/tests/tests/tpc-h/tpc-h05.yaml +++ b/tests/tests/tpc-h/tpc-h05.yaml @@ -29,6 +29,7 @@ name: TPC-H05 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h06.yaml b/tests/tests/tpc-h/tpc-h06.yaml index ede9e0ca..5f3b75a6 100644 --- a/tests/tests/tpc-h/tpc-h06.yaml +++ b/tests/tests/tpc-h/tpc-h06.yaml @@ -12,6 +12,7 @@ name: TPC-H06 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h07.yaml b/tests/tests/tpc-h/tpc-h07.yaml index 7f585a93..1c99a8ee 100644 --- a/tests/tests/tpc-h/tpc-h07.yaml +++ b/tests/tests/tpc-h/tpc-h07.yaml @@ -41,6 +41,7 @@ name: TPC-H07 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h08.yaml b/tests/tests/tpc-h/tpc-h08.yaml index e9bff2f1..f550bfcf 100644 --- a/tests/tests/tpc-h/tpc-h08.yaml +++ b/tests/tests/tpc-h/tpc-h08.yaml @@ -39,6 +39,7 @@ name: TPC-H08 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h09.yaml b/tests/tests/tpc-h/tpc-h09.yaml index 1c6ae21b..3d02cd6e 100644 --- a/tests/tests/tpc-h/tpc-h09.yaml +++ b/tests/tests/tpc-h/tpc-h09.yaml @@ -34,6 +34,7 @@ name: TPC-H09 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h10.yaml b/tests/tests/tpc-h/tpc-h10.yaml index 67e993c1..4231b33c 100644 --- a/tests/tests/tpc-h/tpc-h10.yaml +++ b/tests/tests/tpc-h/tpc-h10.yaml @@ -34,6 +34,7 @@ name: TPC-H10 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h14.yaml b/tests/tests/tpc-h/tpc-h14.yaml index 38d1afeb..f829f860 100644 --- a/tests/tests/tpc-h/tpc-h14.yaml +++ b/tests/tests/tpc-h/tpc-h14.yaml @@ -15,6 +15,7 @@ name: TPC-H14 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: diff --git a/tests/tests/tpc-h/tpc-h19.yaml b/tests/tests/tpc-h/tpc-h19.yaml index c3d4396c..cd00087d 100644 --- a/tests/tests/tpc-h/tpc-h19.yaml +++ b/tests/tests/tpc-h/tpc-h19.yaml @@ -37,6 +37,7 @@ name: TPC-H19 diags: - { code: 0001, max: i } # Suppress "not yet implemented" warnings +- { code: 0006, max: i } # Suppress deprecation warnings - { code: 3002, max: i } # Suppress function name resolution errors (function parsing isn't implemented yet) - { code: 6003, max: i } # Suppress function definition check warnings (function parsing isn't implemented yet) plan: