-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Fix Redundant ScalarValue Boxed Collection #2523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 4 commits
95ea4db
d750e43
5661512
77b3557
1d883ac
d64f224
66c8c2f
f957fc3
b9ea1a5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,8 +74,7 @@ pub enum ScalarValue { | |
| /// large binary | ||
| LargeBinary(Option<Vec<u8>>), | ||
| /// list of nested ScalarValue (boxed to reduce size_of(ScalarValue)) | ||
| #[allow(clippy::box_collection)] | ||
| List(Option<Box<Vec<ScalarValue>>>, Box<DataType>), | ||
| List(Option<Vec<ScalarValue>>, DataType), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure about unboxing DataType here |
||
| /// Date stored as a signed 32bit int | ||
| Date32(Option<i32>), | ||
| /// Date stored as a signed 64bit int | ||
|
|
@@ -95,8 +94,7 @@ pub enum ScalarValue { | |
| /// Interval with MonthDayNano unit | ||
| IntervalMonthDayNano(Option<i128>), | ||
| /// struct of nested ScalarValue (boxed to reduce size_of(ScalarValue)) | ||
|
comphead marked this conversation as resolved.
Outdated
|
||
| #[allow(clippy::box_collection)] | ||
| Struct(Option<Box<Vec<ScalarValue>>>, Box<Vec<Field>>), | ||
| Struct(Option<Vec<ScalarValue>>, Vec<Field>), | ||
| } | ||
|
|
||
| // manual implementation of `PartialEq` that uses OrderedFloat to | ||
|
|
@@ -400,7 +398,7 @@ macro_rules! build_list { | |
| ) | ||
| } | ||
| Some(values) => { | ||
| build_values_list!($VALUE_BUILDER_TY, $SCALAR_TY, values.as_ref(), $SIZE) | ||
| build_values_list!($VALUE_BUILDER_TY, $SCALAR_TY, values, $SIZE) | ||
| } | ||
| } | ||
| }}; | ||
|
|
@@ -420,37 +418,34 @@ macro_rules! build_timestamp_list { | |
| $SIZE, | ||
| ) | ||
| } | ||
| Some(values) => { | ||
| let values = values.as_ref(); | ||
| match $TIME_UNIT { | ||
| TimeUnit::Second => { | ||
| build_values_list_tz!( | ||
| TimestampSecondBuilder, | ||
| TimestampSecond, | ||
| values, | ||
| $SIZE | ||
| ) | ||
| } | ||
| TimeUnit::Microsecond => build_values_list_tz!( | ||
| TimestampMillisecondBuilder, | ||
| TimestampMillisecond, | ||
| values, | ||
| $SIZE | ||
| ), | ||
| TimeUnit::Millisecond => build_values_list_tz!( | ||
| TimestampMicrosecondBuilder, | ||
| TimestampMicrosecond, | ||
| Some(values) => match $TIME_UNIT { | ||
| TimeUnit::Second => { | ||
| build_values_list_tz!( | ||
| TimestampSecondBuilder, | ||
| TimestampSecond, | ||
| values, | ||
| $SIZE | ||
| ), | ||
| TimeUnit::Nanosecond => build_values_list_tz!( | ||
| TimestampNanosecondBuilder, | ||
| TimestampNanosecond, | ||
| values, | ||
| $SIZE | ||
| ), | ||
| ) | ||
| } | ||
| } | ||
| TimeUnit::Microsecond => build_values_list_tz!( | ||
| TimestampMillisecondBuilder, | ||
| TimestampMillisecond, | ||
| values, | ||
| $SIZE | ||
| ), | ||
| TimeUnit::Millisecond => build_values_list_tz!( | ||
| TimestampMicrosecondBuilder, | ||
| TimestampMicrosecond, | ||
| values, | ||
| $SIZE | ||
| ), | ||
| TimeUnit::Nanosecond => build_values_list_tz!( | ||
| TimestampNanosecondBuilder, | ||
| TimestampNanosecond, | ||
| values, | ||
| $SIZE | ||
| ), | ||
| }, | ||
| } | ||
| }}; | ||
| } | ||
|
|
@@ -587,11 +582,9 @@ impl ScalarValue { | |
| ScalarValue::LargeUtf8(_) => DataType::LargeUtf8, | ||
| ScalarValue::Binary(_) => DataType::Binary, | ||
| ScalarValue::LargeBinary(_) => DataType::LargeBinary, | ||
| ScalarValue::List(_, data_type) => DataType::List(Box::new(Field::new( | ||
| "item", | ||
| data_type.as_ref().clone(), | ||
| true, | ||
| ))), | ||
| ScalarValue::List(_, data_type) => { | ||
| DataType::List(Box::new(Field::new("item", data_type.clone(), true))) | ||
| } | ||
| ScalarValue::Date32(_) => DataType::Date32, | ||
| ScalarValue::Date64(_) => DataType::Date64, | ||
| ScalarValue::IntervalYearMonth(_) => { | ||
|
|
@@ -601,7 +594,7 @@ impl ScalarValue { | |
| ScalarValue::IntervalMonthDayNano(_) => { | ||
| DataType::Interval(IntervalUnit::MonthDayNano) | ||
| } | ||
| ScalarValue::Struct(_, fields) => DataType::Struct(fields.as_ref().clone()), | ||
| ScalarValue::Struct(_, fields) => DataType::Struct(fields.clone()), | ||
| ScalarValue::Null => DataType::Null, | ||
| } | ||
| } | ||
|
|
@@ -804,7 +797,6 @@ impl ScalarValue { | |
| for scalar in scalars.into_iter() { | ||
| match scalar { | ||
| ScalarValue::List(Some(xs), _) => { | ||
| let xs = *xs; | ||
| for s in xs { | ||
| match s { | ||
| ScalarValue::$SCALAR_TY(Some(val)) => { | ||
|
|
@@ -934,16 +926,20 @@ impl ScalarValue { | |
| match values { | ||
| Some(values) => { | ||
| // Push value for each field | ||
| for c in 0..columns.len() { | ||
| let column = columns.get_mut(c).unwrap(); | ||
| column.push(values[c].clone()); | ||
| for (i, v) in | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 this is a nicer implementation
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. clippy complained on
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All the better that removing a layer of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A perhaps more idiomatic way to write this would be something like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, thats more concise. Fixed in 2 places. |
||
| values.iter().enumerate().take(columns.len()) | ||
| { | ||
| let column = columns.get_mut(i).unwrap(); | ||
| column.push(v.clone()); | ||
| } | ||
| } | ||
| None => { | ||
| // Push NULL of the appropriate type for each field | ||
| for c in 0..columns.len() { | ||
| let dtype = fields[c].data_type(); | ||
| let column = columns.get_mut(c).unwrap(); | ||
| for (i, f) in | ||
| fields.iter().enumerate().take(columns.len()) | ||
| { | ||
| let dtype = f.data_type(); | ||
| let column = columns.get_mut(i).unwrap(); | ||
| column.push(ScalarValue::try_from(dtype)?); | ||
| } | ||
| } | ||
|
|
@@ -1022,7 +1018,7 @@ impl ScalarValue { | |
| if let ScalarValue::List(values, _) = scalar { | ||
| match values { | ||
| Some(values) => { | ||
| let element_array = ScalarValue::iter_to_array(*values)?; | ||
| let element_array = ScalarValue::iter_to_array(values)?; | ||
|
|
||
| // Add new offset index | ||
| flat_len += element_array.len() as i32; | ||
|
|
@@ -1182,7 +1178,7 @@ impl ScalarValue { | |
| .collect::<LargeBinaryArray>(), | ||
| ), | ||
| }, | ||
| ScalarValue::List(values, data_type) => Arc::new(match data_type.as_ref() { | ||
| ScalarValue::List(values, data_type) => Arc::new(match data_type { | ||
| DataType::Boolean => build_list!(BooleanBuilder, Boolean, values, size), | ||
| DataType::Int8 => build_list!(Int8Builder, Int8, values, size), | ||
| DataType::Int16 => build_list!(Int16Builder, Int16, values, size), | ||
|
|
@@ -1205,7 +1201,7 @@ impl ScalarValue { | |
| repeat(self.clone()).take(size), | ||
| &DataType::List(Box::new(Field::new( | ||
| "item", | ||
| data_type.as_ref().clone(), | ||
| data_type.clone(), | ||
| true, | ||
| ))), | ||
| ) | ||
|
|
@@ -1327,8 +1323,7 @@ impl ScalarValue { | |
| Some(scalar_vec) | ||
| } | ||
| }; | ||
| let value = value.map(Box::new); | ||
| let data_type = Box::new(nested_type.data_type().clone()); | ||
| let data_type = nested_type.data_type().clone(); | ||
| ScalarValue::List(value, data_type) | ||
| } | ||
| DataType::Date32 => { | ||
|
|
@@ -1413,7 +1408,7 @@ impl ScalarValue { | |
| let col_scalar = ScalarValue::try_from_array(col_array, index)?; | ||
| field_values.push(col_scalar); | ||
| } | ||
| Self::Struct(Some(Box::new(field_values)), Box::new(fields.clone())) | ||
| Self::Struct(Some(field_values), fields.clone()) | ||
| } | ||
| DataType::FixedSizeList(nested_type, _len) => { | ||
| let list_array = | ||
|
|
@@ -1428,8 +1423,7 @@ impl ScalarValue { | |
| Some(scalar_vec) | ||
| } | ||
| }; | ||
| let value = value.map(Box::new); | ||
| let data_type = Box::new(nested_type.data_type().clone()); | ||
| let data_type = nested_type.data_type().clone(); | ||
| ScalarValue::List(value, data_type) | ||
| } | ||
| other => { | ||
|
|
@@ -1635,7 +1629,7 @@ impl From<Vec<(&str, ScalarValue)>> for ScalarValue { | |
| }) | ||
| .unzip(); | ||
|
|
||
| Self::Struct(Some(Box::new(scalars)), Box::new(fields)) | ||
| Self::Struct(Some(scalars), fields) | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -1763,11 +1757,9 @@ impl TryFrom<&DataType> for ScalarValue { | |
| value_type.as_ref().try_into()? | ||
| } | ||
| DataType::List(ref nested_type) => { | ||
| ScalarValue::List(None, Box::new(nested_type.data_type().clone())) | ||
| } | ||
| DataType::Struct(fields) => { | ||
| ScalarValue::Struct(None, Box::new(fields.clone())) | ||
| ScalarValue::List(None, nested_type.data_type().clone()) | ||
| } | ||
| DataType::Struct(fields) => ScalarValue::Struct(None, fields.clone()), | ||
| DataType::Null => ScalarValue::Null, | ||
| _ => { | ||
| return Err(DataFusionError::NotImplemented(format!( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.