Skip to content

Commit

Permalink
fix: make ScalarValue::Dictionary with NULL values produce NULL arrays (
Browse files Browse the repository at this point in the history
#11908)

Update the way ScalarValue::Dictionary values are turned into arrays
such that:

    scalar_value.is_null() == scalar_value.to_array()?.is_null(0)

Previously the dictionary would be created with a valid key entry
pointing to a NULL value.
https://arrow.apache.org/docs/format/Columnar.html#dictionary-encoded-layout
suggests that this does not constitute a NULL entry.
  • Loading branch information
mhilton committed Aug 10, 2024
1 parent 79fa6f9 commit 2730423
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -801,9 +801,13 @@ fn dict_from_scalar<K: ArrowDictionaryKeyType>(
let values_array = value.to_array_of_size(1)?;

// Create a key array with `size` elements, each of 0
let key_array: PrimitiveArray<K> = std::iter::repeat(Some(K::default_value()))
.take(size)
.collect();
let key_array: PrimitiveArray<K> = std::iter::repeat(if value.is_null() {
None
} else {
Some(K::default_value())
})
.take(size)
.collect();

// create a new DictionaryArray
//
Expand Down Expand Up @@ -6674,4 +6678,15 @@ mod tests {
);
assert!(dense_scalar.is_null());
}

#[test]
fn null_dictionary_scalar_produces_null_dictionary_array() {
let dictionary_scalar = ScalarValue::Dictionary(
Box::new(DataType::Int32),
Box::new(ScalarValue::Null),
);
assert!(dictionary_scalar.is_null());
let dictionary_array = dictionary_scalar.to_array().unwrap();
assert!(dictionary_array.is_null(0));
}
}

0 comments on commit 2730423

Please sign in to comment.