Skip to content
This repository was archived by the owner on Nov 25, 2025. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ version = "0.2.3"
[workspace.dependencies]
substrait-expr-funcgen = { path = "./substrait-expr-funcgen", version = "0.2.1" }
substrait-expr-macros = { path = "./substrait-expr-macros", version = "0.2.1" }
substrait = { version = "0.50.4" }
substrait = { version = "0.62.0" }
18 changes: 9 additions & 9 deletions substrait-expr-funcgen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ fn generate_type(fn_name: &str, type_name: &str) -> Option<TokenStream> {

fn generate_arg_type(fn_name: &str, typ: &Type) -> Option<TokenStream> {
let type_name = match typ {
Type::Variant0(type_str) => type_str.as_str(),
Type::Variant1(_) => "",
Type::String(type_str) => type_str.as_str(),
Type::Object(_) => "",
};
if type_name.is_empty() {
return None;
Expand All @@ -70,8 +70,8 @@ fn generate_arg_type(fn_name: &str, typ: &Type) -> Option<TokenStream> {

fn generate_arg_return(fn_name: &str, typ: &Type) -> Option<TokenStream> {
let type_name = match typ {
Type::Variant0(type_str) => type_str.as_str(),
Type::Variant1(_) => "",
Type::String(type_str) => type_str.as_str(),
Type::Object(_) => "",
};
if type_name.is_empty() {
return None;
Expand Down Expand Up @@ -196,9 +196,9 @@ fn generate_ext_impls(function: &ScalarFunction) -> Result<Vec<(TokenStream, Tok
.unwrap()
})
.collect::<Vec<_>>();
let prototype = quote!(fn #fn_name_token(&self, #(#arg_name_tokens: Expression),*) -> FunctionBuilder;);
let prototype = quote!(fn #fn_name_token(&self, #(#arg_name_tokens: Expression),*) -> FunctionBuilder<'_>;);
let imp = quote!(
fn #fn_name_token(&self, #(#arg_name_tokens: Expression),*) -> FunctionBuilder {
fn #fn_name_token(&self, #(#arg_name_tokens: Expression),*) -> FunctionBuilder<'_> {
self.new_builder(&#func_name_caps, vec![#(#arg_name_tokens),*])
}
);
Expand All @@ -224,7 +224,7 @@ fn generate_function_blocks(
let prototypes_impls = extensions
.scalar_functions
.iter()
.map(|func| generate_ext_impls(func))
.map(generate_ext_impls)
.flat_map(|impls| match impls {
Ok(impls) => impls.into_iter().map(Ok).collect(),
Err(err) => vec![Err(err)],
Expand Down Expand Up @@ -262,7 +262,7 @@ pub fn generate_functions_for_yaml(uri: &str, filepath: &str) -> Result<TokenStr
.to_str()
.unwrap();

let func_blocks = generate_function_blocks(uri, &mod_name, extensions)?;
let func_blocks = generate_function_blocks(uri, mod_name, extensions)?;

let mod_name_token: TokenStream = mod_name.parse()?;

Expand Down Expand Up @@ -299,7 +299,7 @@ impl Options {
pub fn generate_functions(entries: &[(&str, &str)], options: Options) -> Result<()> {
let yaml_modules = entries
.iter()
.map(|entry| generate_functions_for_yaml(entry.0, &entry.1))
.map(|entry| generate_functions_for_yaml(entry.0, entry.1))
.collect::<Result<Vec<_>>>()?;
let crate_name_token: TokenStream = options.get_crate_name().parse()?;

Expand Down
2 changes: 1 addition & 1 deletion substrait-expr-macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ fn rust_to_names_fields(schema: &NestedType) -> proc_macro2::TokenStream {
let parsed_fields = schema
.fields
.iter()
.map(|field| rust_field_to_names_field(field))
.map(rust_field_to_names_field)
.collect::<Vec<_>>();
quote! {vec![#(#parsed_fields),*]}
}
Expand Down
2 changes: 1 addition & 1 deletion substrait-expr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ include = [
substrait.workspace = true
substrait-expr-macros.workspace = true
once_cell = "1.19.0"
prost = "0.13.3"
prost = "0.14.1"
thiserror = "2.0.3"

[build-dependencies]
Expand Down
20 changes: 7 additions & 13 deletions substrait-expr/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,21 +205,13 @@ pub mod functions;
pub mod schema;
pub mod types;

#[derive(Default)]
pub struct BuilderParams {
pub allow_late_name_lookup: bool,
pub allow_loose_types: bool,
pub allow_unknown_types: bool,
}

impl Default for BuilderParams {
fn default() -> Self {
Self {
allow_late_name_lookup: false,
allow_loose_types: false,
allow_unknown_types: false,
}
}
}

impl BuilderParams {
pub fn new_loose() -> Self {
Expand Down Expand Up @@ -269,7 +261,7 @@ pub trait IntoExprOutputNames {
fn into_names(self) -> Vec<String>;
}

impl<'a> IntoExprOutputNames for &'a str {
impl IntoExprOutputNames for &str {
fn into_names(self) -> Vec<String> {
vec![self.to_string()]
}
Expand All @@ -296,11 +288,11 @@ impl ExpressionsBuilder {
}
}

pub fn fields(&self) -> RefBuilder {
pub fn fields(&self) -> RefBuilder<'_> {
RefBuilder::new(&self.schema, &self.params, self.functions())
}

pub fn functions(&self) -> FunctionsBuilder {
pub fn functions(&self) -> FunctionsBuilder<'_> {
FunctionsBuilder::new(&self.schema)
}

Expand All @@ -319,7 +311,7 @@ impl ExpressionsBuilder {
}

pub fn build(self) -> ExtendedExpression {
let (extension_uris, extensions) = self.schema.extensions_registry().to_substrait();
let (extension_uris, extension_urns, extensions) = self.schema.extensions_registry().to_substrait();
let referred_expr = self
.expressions
.into_inner()
Expand All @@ -329,9 +321,11 @@ impl ExpressionsBuilder {
expr_type: Some(ExprType::Expression(named_expr.expr)),
})
.collect::<Vec<_>>();
#[expect(deprecated)]
ExtendedExpression {
version: Some(substrait::version::version_with_producer("substrait-expr")),
extension_uris,
extension_urns,
extensions,
advanced_extensions: None,
expected_type_urls: Vec::new(),
Expand Down
10 changes: 5 additions & 5 deletions substrait-expr/src/builder/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,9 @@ impl FunctionDefinition {
/// See [`lookup_field_by_name`](crate::builder::functions::FunctionsBuilder::lookup_field_by_name)
///
/// This is very likely to change when Substrait formally adopts a late lookup feature
pub const LOOKUP_BY_NAME_FUNC_URI: &'static str = "https://substrait.io/functions";
pub const LOOKUP_BY_NAME_FUNC_URI: &str = "https://substrait.io/functions";
/// The name of the special function we use to indicate a late lookup
pub const LOOKUP_BY_NAME_FUNC_NAME: &'static str = "lookup_by_name";
pub const LOOKUP_BY_NAME_FUNC_NAME: &str = "lookup_by_name";

/// A builder that can create scalar function expressions
pub struct FunctionsBuilder<'a> {
Expand All @@ -210,10 +210,10 @@ impl<'a> FunctionsBuilder<'a> {
&self,
func: &'static FunctionDefinition,
args: Vec<Expression>,
) -> FunctionBuilder {
) -> FunctionBuilder<'_> {
let func_reference = self.schema.extensions_registry().register_function(func);
FunctionBuilder {
func: func,
func,
func_reference,
args,
options: BTreeMap::new(),
Expand Down Expand Up @@ -312,7 +312,7 @@ impl<'a> FunctionBuilder<'a> {
// TODO: This is a hack. We need to find which input argument to base the return type on
// by matching the template names (e.g. if it is foo<T1,T2>(T1,T2) => T2 then this would
// do the wrong thing)
FunctionReturn::Templated(_) => self.args.first().unwrap().output_type(&self.schema)?,
FunctionReturn::Templated(_) => self.args.first().unwrap().output_type(self.schema)?,
};

Ok(Expression {
Expand Down
14 changes: 13 additions & 1 deletion substrait-expr/src/builder/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,12 @@ pub struct TypesOnlySchemaBuilder {
registry: ExtensionsRegistry,
}

impl Default for TypesOnlySchemaBuilder {
fn default() -> Self {
Self::new()
}
}

impl TypesOnlySchemaBuilder {
/// Create a new builder
pub fn new() -> Self {
Expand Down Expand Up @@ -153,7 +159,7 @@ impl TypesOnlySchemaBuilder {
}

/// Create a type builder to create user defined types
pub fn types(&self) -> TypeBuilder {
pub fn types(&self) -> TypeBuilder<'_> {
TypeBuilder {
registry: &self.registry,
}
Expand All @@ -166,6 +172,12 @@ pub struct NamesOnlySchemaNodeBuilder {
registry: ExtensionsRegistry,
}

impl Default for NamesOnlySchemaNodeBuilder {
fn default() -> Self {
Self::new()
}
}

impl NamesOnlySchemaNodeBuilder {
/// Create a new builder
pub fn new() -> Self {
Expand Down
4 changes: 2 additions & 2 deletions substrait-expr/src/helpers/literals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ impl LiteralInference for i32 {

impl LiteralInference for i64 {
fn to_substrait(self) -> LiteralType {
LiteralType::I64(self as i64)
LiteralType::I64(self)
}
fn try_from_substrait(lit: &LiteralType) -> Result<Self> {
match lit {
Expand Down Expand Up @@ -261,7 +261,7 @@ pub mod literals {
} else {
Ok(make_literal(
LiteralType::VarChar(VarChar {
value: value.into(),
value,
length,
}),
false,
Expand Down
36 changes: 24 additions & 12 deletions substrait-expr/src/helpers/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{collections::BTreeMap, sync::RwLock};

use substrait::proto::extensions::{
simple_extension_declaration::{ExtensionFunction, ExtensionType, MappingType},
SimpleExtensionDeclaration, SimpleExtensionUri,
SimpleExtensionDeclaration, SimpleExtensionUri, SimpleExtensionUrn,
};

use crate::builder::functions::FunctionDefinition;
Expand Down Expand Up @@ -56,14 +56,22 @@ impl UriLookup {
})
}

pub fn to_substrait(self) -> Vec<SimpleExtensionUri> {
self.uris
.into_iter()
pub fn to_substrait(self) -> (Vec<SimpleExtensionUri>, Vec<SimpleExtensionUrn>) {
let uris = self.uris
.iter()
.map(|entry| SimpleExtensionUri {
extension_uri_anchor: entry.1,
uri: entry.0,
extension_uri_anchor: *entry.1,
uri: entry.0.clone(),
})
.collect::<Vec<_>>();
let urns = self.uris
.into_iter()
.map(|entry| SimpleExtensionUrn {
extension_urn_anchor: entry.1,
urn: entry.0,
})
.collect::<Vec<_>>()
.collect::<Vec<_>>();
Comment thread
timsaucer marked this conversation as resolved.
(uris, urns)
}
}

Expand Down Expand Up @@ -101,7 +109,7 @@ impl RegistryInternal {
let anchor = self.counter;
self.counter += 1;
let type_record = TypeRecord {
uri: uri,
uri,
name: name.to_string(),
anchor,
};
Expand All @@ -121,7 +129,7 @@ impl RegistryInternal {
let function_record = FunctionRecord {
uri: uri.to_string(),
name: name.to_string(),
anchor: anchor,
anchor,
};
self.functions_inverse
.insert(anchor, function_record.clone());
Expand Down Expand Up @@ -213,9 +221,11 @@ impl ExtensionsRegistry {
) {
for record in internal.types.values() {
let uri_ref = uris.register(record.uri.clone());
#[expect(deprecated)]
let declaration = SimpleExtensionDeclaration {
mapping_type: Some(MappingType::ExtensionType(ExtensionType {
extension_uri_reference: uri_ref,
extension_urn_reference: uri_ref,
type_anchor: record.anchor,
name: record.name.clone(),
})),
Expand All @@ -232,9 +242,11 @@ impl ExtensionsRegistry {
) {
for record in internal.functions.values() {
let uri_ref = uris.register(record.uri.clone());
#[expect(deprecated)]
let declaration = SimpleExtensionDeclaration {
mapping_type: Some(MappingType::ExtensionFunction(ExtensionFunction {
extension_uri_reference: uri_ref,
extension_urn_reference: uri_ref,
function_anchor: record.anchor,
name: record.name.clone(),
})),
Expand All @@ -246,16 +258,16 @@ impl ExtensionsRegistry {
/// Creates a substrait representation of the extensions registry
///
/// This is typically placed in a top-level message such as ExtendedExpression or Plan
pub fn to_substrait(&self) -> (Vec<SimpleExtensionUri>, Vec<SimpleExtensionDeclaration>) {
pub fn to_substrait(&self) -> (Vec<SimpleExtensionUri>, Vec<SimpleExtensionUrn>, Vec<SimpleExtensionDeclaration>) {
let mut uris = UriLookup::new();
let mut extensions: Vec<SimpleExtensionDeclaration> = Vec::new();
let internal = self.internal.read().unwrap();

self.add_types(&internal, &mut uris, &mut extensions);
self.add_functions(&internal, &mut uris, &mut extensions);

let uris = uris.to_substrait();
let (uris, urns) = uris.to_substrait();

(uris, extensions)
(uris, urns, extensions)
}
}
2 changes: 1 addition & 1 deletion substrait-expr/src/helpers/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ impl SchemaInfo {
/// Returns an iterator through the names of the fields, in DFS order
///
/// Returns an error if the schema does not know the names of its fields
pub fn names_dfs<'a>(&'a self) -> Result<Box<dyn Iterator<Item = &str> + 'a>> {
pub fn names_dfs<'a>(&'a self) -> Result<Box<dyn Iterator<Item = &'a str> + 'a>> {
match self {
SchemaInfo::Empty(_) => Err(SubstraitExprError::invalid_input(
"Attempt to access field names when the schema is not name-aware",
Expand Down
4 changes: 2 additions & 2 deletions substrait-expr/src/helpers/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,8 +255,8 @@ pub fn struct_(nullable: bool, children: Vec<Type>) -> Type {
}
}
/// The URI of the unknown type
pub const UNKNOWN_TYPE_URI: &'static str = "https://substrait.io/types";
pub const UNKNOWN_TYPE_URI: &str = "https://substrait.io/types";
/// The name of the unknown type
pub const UNKNOWN_TYPE_NAME: &'static str = "unknown";
pub const UNKNOWN_TYPE_NAME: &str = "unknown";
/// A friendly name that indicates there is no type variation being used
pub const NO_VARIATION: u32 = 0;
19 changes: 0 additions & 19 deletions substrait-expr/src/util.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,5 @@
use crate::error::{Result, SubstraitExprError};

/// Helper trait for extracting a property that should always be present
/// from a protobuf message and returning an error if it is not
pub(crate) trait HasRequiredProperties<T> {
fn into_required(self, prop_name: &str) -> Result<T>;
}

impl<T> HasRequiredProperties<T> for Option<T> {
// TODO: Is there any better way to do this that doesn't require specifying prop_name?
// Maybe a macro of some kind?
fn into_required(self, prop_name: &str) -> Result<T> {
self.ok_or_else(|| {
SubstraitExprError::InvalidSubstrait(format!(
"The required property {} is missing",
prop_name
))
})
}
}

/// Helper trait for extracting a property that should always be present
/// from a protobuf message and returning an error if it is not
pub(crate) trait HasRequiredPropertiesRef<T> {
Expand Down
2 changes: 1 addition & 1 deletion substrait-expr/substrait
Submodule substrait updated 225 files
Loading