diff --git a/Cargo.toml b/Cargo.toml index dbd6743..ad8b858 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,4 +15,4 @@ version = "0.2.3" [workspace.dependencies] substrait-expr-funcgen = { path = "./substrait-expr-funcgen", version = "0.2.1" } substrait-expr-macros = { path = "./substrait-expr-macros", version = "0.2.1" } -substrait = { version = "0.50.4" } +substrait = { version = "0.62.0" } diff --git a/substrait-expr-funcgen/src/lib.rs b/substrait-expr-funcgen/src/lib.rs index 20c6dd8..17069da 100644 --- a/substrait-expr-funcgen/src/lib.rs +++ b/substrait-expr-funcgen/src/lib.rs @@ -54,8 +54,8 @@ fn generate_type(fn_name: &str, type_name: &str) -> Option { fn generate_arg_type(fn_name: &str, typ: &Type) -> Option { let type_name = match typ { - Type::Variant0(type_str) => type_str.as_str(), - Type::Variant1(_) => "", + Type::String(type_str) => type_str.as_str(), + Type::Object(_) => "", }; if type_name.is_empty() { return None; @@ -70,8 +70,8 @@ fn generate_arg_type(fn_name: &str, typ: &Type) -> Option { fn generate_arg_return(fn_name: &str, typ: &Type) -> Option { let type_name = match typ { - Type::Variant0(type_str) => type_str.as_str(), - Type::Variant1(_) => "", + Type::String(type_str) => type_str.as_str(), + Type::Object(_) => "", }; if type_name.is_empty() { return None; @@ -196,9 +196,9 @@ fn generate_ext_impls(function: &ScalarFunction) -> Result>(); - let prototype = quote!(fn #fn_name_token(&self, #(#arg_name_tokens: Expression),*) -> FunctionBuilder;); + let prototype = quote!(fn #fn_name_token(&self, #(#arg_name_tokens: Expression),*) -> FunctionBuilder<'_>;); let imp = quote!( - fn #fn_name_token(&self, #(#arg_name_tokens: Expression),*) -> FunctionBuilder { + fn #fn_name_token(&self, #(#arg_name_tokens: Expression),*) -> FunctionBuilder<'_> { self.new_builder(&#func_name_caps, vec![#(#arg_name_tokens),*]) } ); @@ -224,7 +224,7 @@ fn generate_function_blocks( let prototypes_impls = extensions .scalar_functions .iter() - .map(|func| generate_ext_impls(func)) + .map(generate_ext_impls) .flat_map(|impls| match impls { Ok(impls) => impls.into_iter().map(Ok).collect(), Err(err) => vec![Err(err)], @@ -262,7 +262,7 @@ pub fn generate_functions_for_yaml(uri: &str, filepath: &str) -> Result Result<()> { let yaml_modules = entries .iter() - .map(|entry| generate_functions_for_yaml(entry.0, &entry.1)) + .map(|entry| generate_functions_for_yaml(entry.0, entry.1)) .collect::>>()?; let crate_name_token: TokenStream = options.get_crate_name().parse()?; diff --git a/substrait-expr-macros/src/lib.rs b/substrait-expr-macros/src/lib.rs index 05124fd..6fa3d07 100644 --- a/substrait-expr-macros/src/lib.rs +++ b/substrait-expr-macros/src/lib.rs @@ -40,7 +40,7 @@ fn rust_to_names_fields(schema: &NestedType) -> proc_macro2::TokenStream { let parsed_fields = schema .fields .iter() - .map(|field| rust_field_to_names_field(field)) + .map(rust_field_to_names_field) .collect::>(); quote! {vec![#(#parsed_fields),*]} } diff --git a/substrait-expr/Cargo.toml b/substrait-expr/Cargo.toml index 105ceae..8cdd265 100644 --- a/substrait-expr/Cargo.toml +++ b/substrait-expr/Cargo.toml @@ -21,7 +21,7 @@ include = [ substrait.workspace = true substrait-expr-macros.workspace = true once_cell = "1.19.0" -prost = "0.13.3" +prost = "0.14.1" thiserror = "2.0.3" [build-dependencies] diff --git a/substrait-expr/src/builder.rs b/substrait-expr/src/builder.rs index bcc4166..0c787ab 100644 --- a/substrait-expr/src/builder.rs +++ b/substrait-expr/src/builder.rs @@ -205,21 +205,13 @@ pub mod functions; pub mod schema; pub mod types; +#[derive(Default)] pub struct BuilderParams { pub allow_late_name_lookup: bool, pub allow_loose_types: bool, pub allow_unknown_types: bool, } -impl Default for BuilderParams { - fn default() -> Self { - Self { - allow_late_name_lookup: false, - allow_loose_types: false, - allow_unknown_types: false, - } - } -} impl BuilderParams { pub fn new_loose() -> Self { @@ -269,7 +261,7 @@ pub trait IntoExprOutputNames { fn into_names(self) -> Vec; } -impl<'a> IntoExprOutputNames for &'a str { +impl IntoExprOutputNames for &str { fn into_names(self) -> Vec { vec![self.to_string()] } @@ -296,11 +288,11 @@ impl ExpressionsBuilder { } } - pub fn fields(&self) -> RefBuilder { + pub fn fields(&self) -> RefBuilder<'_> { RefBuilder::new(&self.schema, &self.params, self.functions()) } - pub fn functions(&self) -> FunctionsBuilder { + pub fn functions(&self) -> FunctionsBuilder<'_> { FunctionsBuilder::new(&self.schema) } @@ -319,7 +311,7 @@ impl ExpressionsBuilder { } pub fn build(self) -> ExtendedExpression { - let (extension_uris, extensions) = self.schema.extensions_registry().to_substrait(); + let (extension_uris, extension_urns, extensions) = self.schema.extensions_registry().to_substrait(); let referred_expr = self .expressions .into_inner() @@ -329,9 +321,11 @@ impl ExpressionsBuilder { expr_type: Some(ExprType::Expression(named_expr.expr)), }) .collect::>(); + #[expect(deprecated)] ExtendedExpression { version: Some(substrait::version::version_with_producer("substrait-expr")), extension_uris, + extension_urns, extensions, advanced_extensions: None, expected_type_urls: Vec::new(), diff --git a/substrait-expr/src/builder/functions.rs b/substrait-expr/src/builder/functions.rs index b8f6d9d..6858511 100644 --- a/substrait-expr/src/builder/functions.rs +++ b/substrait-expr/src/builder/functions.rs @@ -185,9 +185,9 @@ impl FunctionDefinition { /// See [`lookup_field_by_name`](crate::builder::functions::FunctionsBuilder::lookup_field_by_name) /// /// This is very likely to change when Substrait formally adopts a late lookup feature -pub const LOOKUP_BY_NAME_FUNC_URI: &'static str = "https://substrait.io/functions"; +pub const LOOKUP_BY_NAME_FUNC_URI: &str = "https://substrait.io/functions"; /// The name of the special function we use to indicate a late lookup -pub const LOOKUP_BY_NAME_FUNC_NAME: &'static str = "lookup_by_name"; +pub const LOOKUP_BY_NAME_FUNC_NAME: &str = "lookup_by_name"; /// A builder that can create scalar function expressions pub struct FunctionsBuilder<'a> { @@ -210,10 +210,10 @@ impl<'a> FunctionsBuilder<'a> { &self, func: &'static FunctionDefinition, args: Vec, - ) -> FunctionBuilder { + ) -> FunctionBuilder<'_> { let func_reference = self.schema.extensions_registry().register_function(func); FunctionBuilder { - func: func, + func, func_reference, args, options: BTreeMap::new(), @@ -312,7 +312,7 @@ impl<'a> FunctionBuilder<'a> { // TODO: This is a hack. We need to find which input argument to base the return type on // by matching the template names (e.g. if it is foo(T1,T2) => T2 then this would // do the wrong thing) - FunctionReturn::Templated(_) => self.args.first().unwrap().output_type(&self.schema)?, + FunctionReturn::Templated(_) => self.args.first().unwrap().output_type(self.schema)?, }; Ok(Expression { diff --git a/substrait-expr/src/builder/schema.rs b/substrait-expr/src/builder/schema.rs index aa512af..b72d1ed 100644 --- a/substrait-expr/src/builder/schema.rs +++ b/substrait-expr/src/builder/schema.rs @@ -90,6 +90,12 @@ pub struct TypesOnlySchemaBuilder { registry: ExtensionsRegistry, } +impl Default for TypesOnlySchemaBuilder { + fn default() -> Self { + Self::new() + } +} + impl TypesOnlySchemaBuilder { /// Create a new builder pub fn new() -> Self { @@ -153,7 +159,7 @@ impl TypesOnlySchemaBuilder { } /// Create a type builder to create user defined types - pub fn types(&self) -> TypeBuilder { + pub fn types(&self) -> TypeBuilder<'_> { TypeBuilder { registry: &self.registry, } @@ -166,6 +172,12 @@ pub struct NamesOnlySchemaNodeBuilder { registry: ExtensionsRegistry, } +impl Default for NamesOnlySchemaNodeBuilder { + fn default() -> Self { + Self::new() + } +} + impl NamesOnlySchemaNodeBuilder { /// Create a new builder pub fn new() -> Self { diff --git a/substrait-expr/src/helpers/literals.rs b/substrait-expr/src/helpers/literals.rs index a211497..0fc0dff 100644 --- a/substrait-expr/src/helpers/literals.rs +++ b/substrait-expr/src/helpers/literals.rs @@ -116,7 +116,7 @@ impl LiteralInference for i32 { impl LiteralInference for i64 { fn to_substrait(self) -> LiteralType { - LiteralType::I64(self as i64) + LiteralType::I64(self) } fn try_from_substrait(lit: &LiteralType) -> Result { match lit { @@ -261,7 +261,7 @@ pub mod literals { } else { Ok(make_literal( LiteralType::VarChar(VarChar { - value: value.into(), + value, length, }), false, diff --git a/substrait-expr/src/helpers/registry.rs b/substrait-expr/src/helpers/registry.rs index d3d4256..f591ed7 100644 --- a/substrait-expr/src/helpers/registry.rs +++ b/substrait-expr/src/helpers/registry.rs @@ -2,7 +2,7 @@ use std::{collections::BTreeMap, sync::RwLock}; use substrait::proto::extensions::{ simple_extension_declaration::{ExtensionFunction, ExtensionType, MappingType}, - SimpleExtensionDeclaration, SimpleExtensionUri, + SimpleExtensionDeclaration, SimpleExtensionUri, SimpleExtensionUrn, }; use crate::builder::functions::FunctionDefinition; @@ -56,14 +56,22 @@ impl UriLookup { }) } - pub fn to_substrait(self) -> Vec { - self.uris - .into_iter() + pub fn to_substrait(self) -> (Vec, Vec) { + let uris = self.uris + .iter() .map(|entry| SimpleExtensionUri { - extension_uri_anchor: entry.1, - uri: entry.0, + extension_uri_anchor: *entry.1, + uri: entry.0.clone(), + }) + .collect::>(); + let urns = self.uris + .into_iter() + .map(|entry| SimpleExtensionUrn { + extension_urn_anchor: entry.1, + urn: entry.0, }) - .collect::>() + .collect::>(); + (uris, urns) } } @@ -101,7 +109,7 @@ impl RegistryInternal { let anchor = self.counter; self.counter += 1; let type_record = TypeRecord { - uri: uri, + uri, name: name.to_string(), anchor, }; @@ -121,7 +129,7 @@ impl RegistryInternal { let function_record = FunctionRecord { uri: uri.to_string(), name: name.to_string(), - anchor: anchor, + anchor, }; self.functions_inverse .insert(anchor, function_record.clone()); @@ -213,9 +221,11 @@ impl ExtensionsRegistry { ) { for record in internal.types.values() { let uri_ref = uris.register(record.uri.clone()); + #[expect(deprecated)] let declaration = SimpleExtensionDeclaration { mapping_type: Some(MappingType::ExtensionType(ExtensionType { extension_uri_reference: uri_ref, + extension_urn_reference: uri_ref, type_anchor: record.anchor, name: record.name.clone(), })), @@ -232,9 +242,11 @@ impl ExtensionsRegistry { ) { for record in internal.functions.values() { let uri_ref = uris.register(record.uri.clone()); + #[expect(deprecated)] let declaration = SimpleExtensionDeclaration { mapping_type: Some(MappingType::ExtensionFunction(ExtensionFunction { extension_uri_reference: uri_ref, + extension_urn_reference: uri_ref, function_anchor: record.anchor, name: record.name.clone(), })), @@ -246,7 +258,7 @@ impl ExtensionsRegistry { /// Creates a substrait representation of the extensions registry /// /// This is typically placed in a top-level message such as ExtendedExpression or Plan - pub fn to_substrait(&self) -> (Vec, Vec) { + pub fn to_substrait(&self) -> (Vec, Vec, Vec) { let mut uris = UriLookup::new(); let mut extensions: Vec = Vec::new(); let internal = self.internal.read().unwrap(); @@ -254,8 +266,8 @@ impl ExtensionsRegistry { self.add_types(&internal, &mut uris, &mut extensions); self.add_functions(&internal, &mut uris, &mut extensions); - let uris = uris.to_substrait(); + let (uris, urns) = uris.to_substrait(); - (uris, extensions) + (uris, urns, extensions) } } diff --git a/substrait-expr/src/helpers/schema.rs b/substrait-expr/src/helpers/schema.rs index 83a9196..2899e63 100644 --- a/substrait-expr/src/helpers/schema.rs +++ b/substrait-expr/src/helpers/schema.rs @@ -276,7 +276,7 @@ impl SchemaInfo { /// Returns an iterator through the names of the fields, in DFS order /// /// Returns an error if the schema does not know the names of its fields - pub fn names_dfs<'a>(&'a self) -> Result + 'a>> { + pub fn names_dfs<'a>(&'a self) -> Result + 'a>> { match self { SchemaInfo::Empty(_) => Err(SubstraitExprError::invalid_input( "Attempt to access field names when the schema is not name-aware", diff --git a/substrait-expr/src/helpers/types.rs b/substrait-expr/src/helpers/types.rs index b3257f7..b3a3c57 100644 --- a/substrait-expr/src/helpers/types.rs +++ b/substrait-expr/src/helpers/types.rs @@ -255,8 +255,8 @@ pub fn struct_(nullable: bool, children: Vec) -> Type { } } /// The URI of the unknown type -pub const UNKNOWN_TYPE_URI: &'static str = "https://substrait.io/types"; +pub const UNKNOWN_TYPE_URI: &str = "https://substrait.io/types"; /// The name of the unknown type -pub const UNKNOWN_TYPE_NAME: &'static str = "unknown"; +pub const UNKNOWN_TYPE_NAME: &str = "unknown"; /// A friendly name that indicates there is no type variation being used pub const NO_VARIATION: u32 = 0; diff --git a/substrait-expr/src/util.rs b/substrait-expr/src/util.rs index 7d6b9c5..bf82e25 100644 --- a/substrait-expr/src/util.rs +++ b/substrait-expr/src/util.rs @@ -1,24 +1,5 @@ use crate::error::{Result, SubstraitExprError}; -/// Helper trait for extracting a property that should always be present -/// from a protobuf message and returning an error if it is not -pub(crate) trait HasRequiredProperties { - fn into_required(self, prop_name: &str) -> Result; -} - -impl HasRequiredProperties for Option { - // TODO: Is there any better way to do this that doesn't require specifying prop_name? - // Maybe a macro of some kind? - fn into_required(self, prop_name: &str) -> Result { - self.ok_or_else(|| { - SubstraitExprError::InvalidSubstrait(format!( - "The required property {} is missing", - prop_name - )) - }) - } -} - /// Helper trait for extracting a property that should always be present /// from a protobuf message and returning an error if it is not pub(crate) trait HasRequiredPropertiesRef { diff --git a/substrait-expr/substrait b/substrait-expr/substrait index 5c8fa04..4c35318 160000 --- a/substrait-expr/substrait +++ b/substrait-expr/substrait @@ -1 +1 @@ -Subproject commit 5c8fa047993835b2bba60b196af0855316e5efdb +Subproject commit 4c35318727c36d6e49779c06daf9f4ced722fe43