-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for Substrait List/EmptyList literals #10615
Changes from 3 commits
48565f7
c71e577
799d672
047d643
5e185a6
cdc525c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,6 +61,7 @@ use substrait::proto::{ | |
}; | ||
use substrait::proto::{FunctionArgument, SortField}; | ||
|
||
use datafusion::arrow::array::GenericListArray; | ||
use datafusion::common::plan_err; | ||
use datafusion::logical_expr::expr::{InList, InSubquery, Sort}; | ||
use std::collections::HashMap; | ||
|
@@ -1138,7 +1139,7 @@ fn from_substrait_type(dt: &substrait::proto::Type) -> Result<DataType> { | |
from_substrait_type(list.r#type.as_ref().ok_or_else(|| { | ||
substrait_datafusion_err!("List type must have inner type") | ||
})?)?; | ||
let field = Arc::new(Field::new("list_item", inner_type, true)); | ||
let field = Arc::new(Field::new_list_field(inner_type, true)); | ||
match list.type_variation_reference { | ||
DEFAULT_CONTAINER_TYPE_REF => Ok(DataType::List(field)), | ||
LARGE_CONTAINER_TYPE_REF => Ok(DataType::LargeList(field)), | ||
|
@@ -1278,6 +1279,40 @@ pub(crate) fn from_substrait_literal(lit: &Literal) -> Result<ScalarValue> { | |
s, | ||
) | ||
} | ||
Some(LiteralType::List(l)) => { | ||
let elements = l | ||
.values | ||
.iter() | ||
.map(|el| from_substrait_literal(el)) | ||
.collect::<Result<Vec<_>>>()?; | ||
let element_type = elements[0].data_type(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we check if elements are empty and report an error? The literal input might come from systems other than DataFusion, and they might not be properly implemented. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup, done: cdc525c |
||
match lit.type_variation_reference { | ||
DEFAULT_CONTAINER_TYPE_REF => ScalarValue::List(ScalarValue::new_list( | ||
elements.as_slice(), | ||
&element_type, | ||
)), | ||
LARGE_CONTAINER_TYPE_REF => ScalarValue::LargeList( | ||
ScalarValue::new_large_list(elements.as_slice(), &element_type), | ||
), | ||
others => { | ||
return substrait_err!("Unknown type variation reference {others}"); | ||
} | ||
} | ||
} | ||
Some(LiteralType::EmptyList(l)) => { | ||
let element_type = from_substrait_type(l.r#type.clone().unwrap().as_ref())?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we can remove There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, without the type specified we don't know what it should be - I guess we could default to NullType (which I think is what DataFusion does if you just do a "SELECT [] FROM ..", do you think that'd make sense? I feel like Substrait probably intends this field to always exist, though I'm not sure, but e.g. in the Java library they have it as required: https://github.com/substrait-io/substrait-java/blob/79decd20e85d6a1a5623890042ebcf1415cf784a/core/src/main/java/io/substrait/expression/Expression.java#L451 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can return an error like "invalid parameter", but it may not be necessary to do so. Let's keep it as it is for now until someone requests this behavior. |
||
match lit.type_variation_reference { | ||
DEFAULT_CONTAINER_TYPE_REF => { | ||
ScalarValue::List(ScalarValue::new_list(&[], &element_type)) | ||
} | ||
LARGE_CONTAINER_TYPE_REF => ScalarValue::LargeList( | ||
ScalarValue::new_large_list(&[], &element_type), | ||
), | ||
others => { | ||
return substrait_err!("Unknown type variation reference {others}"); | ||
} | ||
} | ||
} | ||
Some(LiteralType::Null(ntype)) => from_substrait_null(ntype)?, | ||
_ => return not_impl_err!("Unsupported literal_type: {:?}", lit.literal_type), | ||
}; | ||
|
@@ -1361,7 +1396,24 @@ fn from_substrait_null(null_type: &Type) -> Result<ScalarValue> { | |
d.precision as u8, | ||
d.scale as i8, | ||
)), | ||
_ => not_impl_err!("Unsupported Substrait type: {kind:?}"), | ||
r#type::Kind::List(l) => { | ||
let field = Field::new_list_field( | ||
from_substrait_type(l.r#type.clone().unwrap().as_ref())?, | ||
true, | ||
); | ||
match l.type_variation_reference { | ||
DEFAULT_CONTAINER_TYPE_REF => Ok(ScalarValue::List(Arc::new( | ||
GenericListArray::new_null(field.into(), 1), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this the correct way for creating null lists, or is there something better? The list-of-lists structure ScalarValue::List uses is a bit confusing to me.. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is correct. |
||
))), | ||
LARGE_CONTAINER_TYPE_REF => Ok(ScalarValue::LargeList(Arc::new( | ||
GenericListArray::new_null(field.into(), 1), | ||
))), | ||
v => not_impl_err!( | ||
"Unsupported Substrait type variation {v} of type {kind:?}" | ||
), | ||
} | ||
} | ||
_ => not_impl_err!("Unsupported Substrait type for null: {kind:?}"), | ||
} | ||
} else { | ||
not_impl_err!("Null type without kind is not supported") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is a breaking change in the sense that the new field name is just "item" - to align with Arrow default