From 4ecb5b9f8ee176ecf9497f028639613e86bf2bee Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Fri, 9 Jan 2026 11:45:59 +0530 Subject: [PATCH 1/2] fix: sqllogictest failing in Substrait round-trip mode --- .../src/logical_plan/consumer/types.rs | 18 +++++++++------ .../src/logical_plan/producer/types.rs | 23 ++++++++++++++++--- datafusion/substrait/src/variation_const.rs | 7 ++++++ 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs index eb2cc967ca236..dd44321456f57 100644 --- a/datafusion/substrait/src/logical_plan/consumer/types.rs +++ b/datafusion/substrait/src/logical_plan/consumer/types.rs @@ -24,13 +24,13 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, - INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF, - LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF, - TIME_64_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF, - TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF, - TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, - VIEW_CONTAINER_TYPE_VARIATION_REF, + FIXED_SIZE_LIST_TYPE_VARIATION_REF, INTERVAL_DAY_TIME_TYPE_REF, + INTERVAL_MONTH_DAY_NANO_TYPE_NAME, INTERVAL_MONTH_DAY_NANO_TYPE_REF, + INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, + TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, + TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF, + TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF, + UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, }; use crate::variation_const::{FLOAT_16_TYPE_NAME, NULL_TYPE_NAME}; use datafusion::arrow::datatypes::{ @@ -167,6 +167,10 @@ pub fn from_substrait_type( match list.type_variation_reference { DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::List(field)), LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::LargeList(field)), + v if v >= FIXED_SIZE_LIST_TYPE_VARIATION_REF => { + let size = (v - FIXED_SIZE_LIST_TYPE_VARIATION_REF) as i32; + Ok(DataType::FixedSizeList(field, size)) + } v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" )?, diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs index 3727596119bc3..ba4e30f15195c 100644 --- a/datafusion/substrait/src/logical_plan/producer/types.rs +++ b/datafusion/substrait/src/logical_plan/producer/types.rs @@ -23,9 +23,10 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - FLOAT_16_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, NULL_TYPE_NAME, - TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, - UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, + FIXED_SIZE_LIST_TYPE_VARIATION_REF, FLOAT_16_TYPE_NAME, + LARGE_CONTAINER_TYPE_VARIATION_REF, NULL_TYPE_NAME, TIME_32_TYPE_VARIATION_REF, + TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, + VIEW_CONTAINER_TYPE_VARIATION_REF, }; use datafusion::arrow::datatypes::{DataType, IntervalUnit}; use datafusion::common::{DFSchemaRef, not_impl_err, plan_err}; @@ -286,6 +287,18 @@ pub(crate) fn to_substrait_type( }))), }) } + DataType::FixedSizeList(inner, size) => { + let inner_type = + to_substrait_type(producer, inner.data_type(), inner.is_nullable())?; + Ok(substrait::proto::Type { + kind: Some(r#type::Kind::List(Box::new(r#type::List { + r#type: Some(Box::new(inner_type)), + type_variation_reference: FIXED_SIZE_LIST_TYPE_VARIATION_REF + + (*size as u32), + nullability, + }))), + }) + } DataType::Map(inner, _) => match inner.data_type() { DataType::Struct(key_and_value) if key_and_value.len() == 2 => { let key_type = to_substrait_type( @@ -439,6 +452,10 @@ mod tests { round_trip_type(DataType::LargeList( Field::new_list_field(DataType::Int32, true).into(), ))?; + round_trip_type(DataType::FixedSizeList( + Field::new_list_field(DataType::Int64, true).into(), + 10, + ))?; round_trip_type(DataType::Map( Field::new_struct( diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs index b1a922899e976..035485401940e 100644 --- a/datafusion/substrait/src/variation_const.rs +++ b/datafusion/substrait/src/variation_const.rs @@ -55,6 +55,13 @@ pub const TIME_64_TYPE_VARIATION_REF: u32 = 1; pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0; pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1; pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2; +/// Used for the arrow type [`DataType::FixedSizeList`]. +/// +/// The size of the fixed list is encoded by adding it to this base value. +/// For example, `FixedSizeList(10)` would use variation reference `3 + 10 = 13`. +/// +/// [`DataType::FixedSizeList`]: datafusion::arrow::datatypes::DataType::FixedSizeList +pub const FIXED_SIZE_LIST_TYPE_VARIATION_REF: u32 = 3; pub const DEFAULT_MAP_TYPE_VARIATION_REF: u32 = 0; pub const DICTIONARY_MAP_TYPE_VARIATION_REF: u32 = 1; pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0; From 2bc3207ef4261940c3e4ea1dd95106a7a4ddabe7 Mon Sep 17 00:00:00 2001 From: Kumar Ujjawal Date: Fri, 9 Jan 2026 23:26:51 +0530 Subject: [PATCH 2/2] use type_parameters to encode the inner type and size --- .../src/logical_plan/consumer/types.rs | 52 ++++++++++++++----- .../src/logical_plan/producer/types.rs | 30 +++++++---- datafusion/substrait/src/variation_const.rs | 12 ++--- 3 files changed, 66 insertions(+), 28 deletions(-) diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs index dd44321456f57..77c86bbbf7344 100644 --- a/datafusion/substrait/src/logical_plan/consumer/types.rs +++ b/datafusion/substrait/src/logical_plan/consumer/types.rs @@ -24,15 +24,17 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - FIXED_SIZE_LIST_TYPE_VARIATION_REF, INTERVAL_DAY_TIME_TYPE_REF, - INTERVAL_MONTH_DAY_NANO_TYPE_NAME, INTERVAL_MONTH_DAY_NANO_TYPE_REF, - INTERVAL_YEAR_MONTH_TYPE_REF, LARGE_CONTAINER_TYPE_VARIATION_REF, - TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, - TIMESTAMP_MICRO_TYPE_VARIATION_REF, TIMESTAMP_MILLI_TYPE_VARIATION_REF, - TIMESTAMP_NANO_TYPE_VARIATION_REF, TIMESTAMP_SECOND_TYPE_VARIATION_REF, - UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, + INTERVAL_DAY_TIME_TYPE_REF, INTERVAL_MONTH_DAY_NANO_TYPE_NAME, + INTERVAL_MONTH_DAY_NANO_TYPE_REF, INTERVAL_YEAR_MONTH_TYPE_REF, + LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF, + TIME_64_TYPE_VARIATION_REF, TIMESTAMP_MICRO_TYPE_VARIATION_REF, + TIMESTAMP_MILLI_TYPE_VARIATION_REF, TIMESTAMP_NANO_TYPE_VARIATION_REF, + TIMESTAMP_SECOND_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, + VIEW_CONTAINER_TYPE_VARIATION_REF, +}; +use crate::variation_const::{ + FIXED_SIZE_LIST_TYPE_NAME, FLOAT_16_TYPE_NAME, NULL_TYPE_NAME, }; -use crate::variation_const::{FLOAT_16_TYPE_NAME, NULL_TYPE_NAME}; use datafusion::arrow::datatypes::{ DataType, Field, Fields, IntervalUnit, Schema, TimeUnit, }; @@ -167,10 +169,6 @@ pub fn from_substrait_type( match list.type_variation_reference { DEFAULT_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::List(field)), LARGE_CONTAINER_TYPE_VARIATION_REF => Ok(DataType::LargeList(field)), - v if v >= FIXED_SIZE_LIST_TYPE_VARIATION_REF => { - let size = (v - FIXED_SIZE_LIST_TYPE_VARIATION_REF) as i32; - Ok(DataType::FixedSizeList(field, size)) - } v => not_impl_err!( "Unsupported Substrait type variation {v} of type {s_kind:?}" )?, @@ -260,6 +258,36 @@ pub fn from_substrait_type( } FLOAT_16_TYPE_NAME => Ok(DataType::Float16), NULL_TYPE_NAME => Ok(DataType::Null), + FIXED_SIZE_LIST_TYPE_NAME => { + if u.type_parameters.len() != 2 { + return substrait_err!( + "FixedSizeList requires 2 type parameters, got {}", + u.type_parameters.len() + ); + } + let inner_type = match &u.type_parameters[0].parameter { + Some(r#type::parameter::Parameter::DataType(t)) => { + from_substrait_type(consumer, t, dfs_names, name_idx)? + } + _ => { + return substrait_err!( + "Invalid inner type for FixedSizeList" + ); + } + }; + let size = match &u.type_parameters[1].parameter { + Some(r#type::parameter::Parameter::Integer(i)) => { + *i as i32 + } + _ => { + return substrait_err!( + "Invalid size for FixedSizeList" + ); + } + }; + let field = Arc::new(Field::new_list_field(inner_type, true)); + Ok(DataType::FixedSizeList(field, size)) + } _ => not_impl_err!( "Unsupported Substrait user defined type with ref {} and variation {}", u.type_reference, diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs index ba4e30f15195c..8a7fc2fcc2921 100644 --- a/datafusion/substrait/src/logical_plan/producer/types.rs +++ b/datafusion/substrait/src/logical_plan/producer/types.rs @@ -23,10 +23,9 @@ use crate::variation_const::{ DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF, DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF, DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF, - FIXED_SIZE_LIST_TYPE_VARIATION_REF, FLOAT_16_TYPE_NAME, - LARGE_CONTAINER_TYPE_VARIATION_REF, NULL_TYPE_NAME, TIME_32_TYPE_VARIATION_REF, - TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF, - VIEW_CONTAINER_TYPE_VARIATION_REF, + FIXED_SIZE_LIST_TYPE_NAME, FLOAT_16_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, + NULL_TYPE_NAME, TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF, + UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF, }; use datafusion::arrow::datatypes::{DataType, IntervalUnit}; use datafusion::common::{DFSchemaRef, not_impl_err, plan_err}; @@ -290,13 +289,26 @@ pub(crate) fn to_substrait_type( DataType::FixedSizeList(inner, size) => { let inner_type = to_substrait_type(producer, inner.data_type(), inner.is_nullable())?; + let type_anchor = + producer.register_type(FIXED_SIZE_LIST_TYPE_NAME.to_string()); Ok(substrait::proto::Type { - kind: Some(r#type::Kind::List(Box::new(r#type::List { - r#type: Some(Box::new(inner_type)), - type_variation_reference: FIXED_SIZE_LIST_TYPE_VARIATION_REF - + (*size as u32), + kind: Some(r#type::Kind::UserDefined(r#type::UserDefined { + type_reference: type_anchor, + type_variation_reference: DEFAULT_TYPE_VARIATION_REF, nullability, - }))), + type_parameters: vec![ + r#type::Parameter { + parameter: Some(r#type::parameter::Parameter::DataType( + inner_type, + )), + }, + r#type::Parameter { + parameter: Some(r#type::parameter::Parameter::Integer( + *size as i64, + )), + }, + ], + })), }) } DataType::Map(inner, _) => match inner.data_type() { diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs index 035485401940e..b253ec7310859 100644 --- a/datafusion/substrait/src/variation_const.rs +++ b/datafusion/substrait/src/variation_const.rs @@ -55,13 +55,6 @@ pub const TIME_64_TYPE_VARIATION_REF: u32 = 1; pub const DEFAULT_CONTAINER_TYPE_VARIATION_REF: u32 = 0; pub const LARGE_CONTAINER_TYPE_VARIATION_REF: u32 = 1; pub const VIEW_CONTAINER_TYPE_VARIATION_REF: u32 = 2; -/// Used for the arrow type [`DataType::FixedSizeList`]. -/// -/// The size of the fixed list is encoded by adding it to this base value. -/// For example, `FixedSizeList(10)` would use variation reference `3 + 10 = 13`. -/// -/// [`DataType::FixedSizeList`]: datafusion::arrow::datatypes::DataType::FixedSizeList -pub const FIXED_SIZE_LIST_TYPE_VARIATION_REF: u32 = 3; pub const DEFAULT_MAP_TYPE_VARIATION_REF: u32 = 0; pub const DICTIONARY_MAP_TYPE_VARIATION_REF: u32 = 1; pub const DECIMAL_128_TYPE_VARIATION_REF: u32 = 0; @@ -137,3 +130,8 @@ pub const FLOAT_16_TYPE_NAME: &str = "fp16"; /// /// [`DataType::Null`]: datafusion::arrow::datatypes::DataType::Null pub const NULL_TYPE_NAME: &str = "null"; + +/// For [`DataType::FixedSizeList`] +/// +/// [`DataType::FixedSizeList`]: datafusion::arrow::datatypes::DataType::FixedSizeList +pub const FIXED_SIZE_LIST_TYPE_NAME: &str = "fixed_size_list";