Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 51 additions & 59 deletions datafusion/common/src/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ pub enum ScalarValue {
/// large binary
LargeBinary(Option<Vec<u8>>),
/// list of nested ScalarValue (boxed to reduce size_of(ScalarValue))
Comment thread
comphead marked this conversation as resolved.
Outdated
#[allow(clippy::box_collection)]
List(Option<Box<Vec<ScalarValue>>>, Box<DataType>),
List(Option<Vec<ScalarValue>>, DataType),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure about unboxing DataType here

/// Date stored as a signed 32bit int
Date32(Option<i32>),
/// Date stored as a signed 64bit int
Expand All @@ -95,8 +94,7 @@ pub enum ScalarValue {
/// Interval with MonthDayNano unit
IntervalMonthDayNano(Option<i128>),
/// struct of nested ScalarValue (boxed to reduce size_of(ScalarValue))
Comment thread
comphead marked this conversation as resolved.
Outdated
#[allow(clippy::box_collection)]
Struct(Option<Box<Vec<ScalarValue>>>, Box<Vec<Field>>),
Struct(Option<Vec<ScalarValue>>, Vec<Field>),
}

// manual implementation of `PartialEq` that uses OrderedFloat to
Expand Down Expand Up @@ -400,7 +398,7 @@ macro_rules! build_list {
)
}
Some(values) => {
build_values_list!($VALUE_BUILDER_TY, $SCALAR_TY, values.as_ref(), $SIZE)
build_values_list!($VALUE_BUILDER_TY, $SCALAR_TY, values, $SIZE)
}
}
}};
Expand All @@ -420,37 +418,34 @@ macro_rules! build_timestamp_list {
$SIZE,
)
}
Some(values) => {
let values = values.as_ref();
match $TIME_UNIT {
TimeUnit::Second => {
build_values_list_tz!(
TimestampSecondBuilder,
TimestampSecond,
values,
$SIZE
)
}
TimeUnit::Microsecond => build_values_list_tz!(
TimestampMillisecondBuilder,
TimestampMillisecond,
values,
$SIZE
),
TimeUnit::Millisecond => build_values_list_tz!(
TimestampMicrosecondBuilder,
TimestampMicrosecond,
Some(values) => match $TIME_UNIT {
TimeUnit::Second => {
build_values_list_tz!(
TimestampSecondBuilder,
TimestampSecond,
values,
$SIZE
),
TimeUnit::Nanosecond => build_values_list_tz!(
TimestampNanosecondBuilder,
TimestampNanosecond,
values,
$SIZE
),
)
}
}
TimeUnit::Microsecond => build_values_list_tz!(
TimestampMillisecondBuilder,
TimestampMillisecond,
values,
$SIZE
),
TimeUnit::Millisecond => build_values_list_tz!(
TimestampMicrosecondBuilder,
TimestampMicrosecond,
values,
$SIZE
),
TimeUnit::Nanosecond => build_values_list_tz!(
TimestampNanosecondBuilder,
TimestampNanosecond,
values,
$SIZE
),
},
}
}};
}
Expand Down Expand Up @@ -587,11 +582,9 @@ impl ScalarValue {
ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
ScalarValue::Binary(_) => DataType::Binary,
ScalarValue::LargeBinary(_) => DataType::LargeBinary,
ScalarValue::List(_, data_type) => DataType::List(Box::new(Field::new(
"item",
data_type.as_ref().clone(),
true,
))),
ScalarValue::List(_, data_type) => {
DataType::List(Box::new(Field::new("item", data_type.clone(), true)))
}
ScalarValue::Date32(_) => DataType::Date32,
ScalarValue::Date64(_) => DataType::Date64,
ScalarValue::IntervalYearMonth(_) => {
Expand All @@ -601,7 +594,7 @@ impl ScalarValue {
ScalarValue::IntervalMonthDayNano(_) => {
DataType::Interval(IntervalUnit::MonthDayNano)
}
ScalarValue::Struct(_, fields) => DataType::Struct(fields.as_ref().clone()),
ScalarValue::Struct(_, fields) => DataType::Struct(fields.clone()),
ScalarValue::Null => DataType::Null,
}
}
Expand Down Expand Up @@ -804,7 +797,6 @@ impl ScalarValue {
for scalar in scalars.into_iter() {
match scalar {
ScalarValue::List(Some(xs), _) => {
let xs = *xs;
for s in xs {
match s {
ScalarValue::$SCALAR_TY(Some(val)) => {
Expand Down Expand Up @@ -934,16 +926,20 @@ impl ScalarValue {
match values {
Some(values) => {
// Push value for each field
for c in 0..columns.len() {
let column = columns.get_mut(c).unwrap();
column.push(values[c].clone());
for (i, v) in

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 this is a nicer implementation

@comphead comphead May 16, 2022

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

clippy complained onfor c in 0..columns.len() :)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the better that removing a layer of Boxing lets clippy fix things more easily 👍

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A perhaps more idiomatic way to write this would be something like

for (column, value) in columns.iter_mut().zip(values)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thats more concise. Fixed in 2 places.

values.iter().enumerate().take(columns.len())
{
let column = columns.get_mut(i).unwrap();
column.push(v.clone());
}
}
None => {
// Push NULL of the appropriate type for each field
for c in 0..columns.len() {
let dtype = fields[c].data_type();
let column = columns.get_mut(c).unwrap();
for (i, f) in
fields.iter().enumerate().take(columns.len())
{
let dtype = f.data_type();
let column = columns.get_mut(i).unwrap();
column.push(ScalarValue::try_from(dtype)?);
}
}
Expand Down Expand Up @@ -1022,7 +1018,7 @@ impl ScalarValue {
if let ScalarValue::List(values, _) = scalar {
match values {
Some(values) => {
let element_array = ScalarValue::iter_to_array(*values)?;
let element_array = ScalarValue::iter_to_array(values)?;

// Add new offset index
flat_len += element_array.len() as i32;
Expand Down Expand Up @@ -1182,7 +1178,7 @@ impl ScalarValue {
.collect::<LargeBinaryArray>(),
),
},
ScalarValue::List(values, data_type) => Arc::new(match data_type.as_ref() {
ScalarValue::List(values, data_type) => Arc::new(match data_type {
DataType::Boolean => build_list!(BooleanBuilder, Boolean, values, size),
DataType::Int8 => build_list!(Int8Builder, Int8, values, size),
DataType::Int16 => build_list!(Int16Builder, Int16, values, size),
Expand All @@ -1205,7 +1201,7 @@ impl ScalarValue {
repeat(self.clone()).take(size),
&DataType::List(Box::new(Field::new(
"item",
data_type.as_ref().clone(),
data_type.clone(),
true,
))),
)
Expand Down Expand Up @@ -1327,8 +1323,7 @@ impl ScalarValue {
Some(scalar_vec)
}
};
let value = value.map(Box::new);
let data_type = Box::new(nested_type.data_type().clone());
let data_type = nested_type.data_type().clone();
ScalarValue::List(value, data_type)
}
DataType::Date32 => {
Expand Down Expand Up @@ -1413,7 +1408,7 @@ impl ScalarValue {
let col_scalar = ScalarValue::try_from_array(col_array, index)?;
field_values.push(col_scalar);
}
Self::Struct(Some(Box::new(field_values)), Box::new(fields.clone()))
Self::Struct(Some(field_values), fields.clone())
}
DataType::FixedSizeList(nested_type, _len) => {
let list_array =
Expand All @@ -1428,8 +1423,7 @@ impl ScalarValue {
Some(scalar_vec)
}
};
let value = value.map(Box::new);
let data_type = Box::new(nested_type.data_type().clone());
let data_type = nested_type.data_type().clone();
ScalarValue::List(value, data_type)
}
other => {
Expand Down Expand Up @@ -1635,7 +1629,7 @@ impl From<Vec<(&str, ScalarValue)>> for ScalarValue {
})
.unzip();

Self::Struct(Some(Box::new(scalars)), Box::new(fields))
Self::Struct(Some(scalars), fields)
}
}

Expand Down Expand Up @@ -1763,11 +1757,9 @@ impl TryFrom<&DataType> for ScalarValue {
value_type.as_ref().try_into()?
}
DataType::List(ref nested_type) => {
ScalarValue::List(None, Box::new(nested_type.data_type().clone()))
}
DataType::Struct(fields) => {
ScalarValue::Struct(None, Box::new(fields.clone()))
ScalarValue::List(None, nested_type.data_type().clone())
}
DataType::Struct(fields) => ScalarValue::Struct(None, fields.clone()),
DataType::Null => ScalarValue::Null,
_ => {
return Err(DataFusionError::NotImplemented(format!(
Expand Down
Loading