-
Notifications
You must be signed in to change notification settings - Fork 520
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Environment
Delta-rs version: 0.27.0
Binding: I am using the Rust library.
Bug
What happened:
Assume I have a column called map
The schema is,
"map": StructField { name: "map", data_type: Map(MapType { type_name: "map", key_type: Primitive(String), value_type: Primitive(String), value_contains_null: false }), nullable: false, metadata: {} }
delta-rs
does not raise any error if I try to ingest {"a": null}
. It ingests successfully.
Now, at the query time, I will get an error like,
ArrowError(InvalidArgumentError("Found unmasked nulls for non-nullable StructArray field \"value\""), None)
What you expected to happen:
delta-rs
should block writing maps with null
values if the schema does not allow it.
How to reproduce it:
There are some helper functions used below, which I have skipped, but the code should be self-explanatory.
#[tokio::test]
async fn null_to_non_null_column_map() {
let schema_without_null_column = vec![
StructField::new("id", DeltaDataType::INTEGER, false),
StructField::new(
"map",
DeltaDataType::Map(Box::new(MapType::new(
DeltaDataType::STRING,
DeltaDataType::STRING,
false,
))),
false,
),
];
let non_null_batch = RecordBatch::try_from_iter_with_nullable(vec![
(
"id",
Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef,
false,
),
(
"map",
create_string_map_array(
&[
&Some(HashMap::from([("a".to_string(), "b".to_string())])),
&Some(HashMap::from([("a".to_string(), "b".to_string())])),
],
false,
)
.unwrap() as ArrayRef,
false,
),
])
.unwrap();
let null_batch = RecordBatch::try_from_iter_with_nullable(vec![
(
"id",
Arc::new(Int32Array::from(vec![Some(1), Some(1)])) as ArrayRef,
true,
),
(
"map",
create_string_map_array_optional_value(&[
&Some(HashMap::from([("a".to_string(), None)])),
&Some(HashMap::from([("a".to_string(), None)])),
])
.unwrap() as ArrayRef,
false,
),
])
.unwrap();
let ops = DeltaOps::try_from_uri("memory:///").await.unwrap();
let mut table = ops
.create()
.with_table_name("my_table")
.with_save_mode(SaveMode::ErrorIfExists)
.with_columns(schema_without_null_column)
.await
.unwrap();
table = DeltaOps::from(table.clone())
.write(vec![non_null_batch.clone()])
.with_schema_mode(SchemaMode::Merge)
.with_save_mode(SaveMode::Append)
.await
.unwrap();
let schema = table.get_schema().unwrap();
println!("schema: {:?}\ndata: {:?}", schema, non_null_batch);
// https://github.com/delta-io/delta-rs/blob/ef9e077402013bedf44fccf872f7d7d4fd9157ab/crates/core/src/delta_datafusion/mod.rs#L1425
// delta does not check for nullability of nested columns.
table = DeltaOps::from(table)
.write(vec![null_batch.clone()])
.with_schema_mode(SchemaMode::Merge)
.with_save_mode(SaveMode::Append)
.await
.unwrap();
let schema = table.get_schema().unwrap();
println!("schema: {:?}\ndata: {:?}", schema, null_batch);
let ctx = SessionContext::new();
ctx.register_table("simple_table", Arc::new(table.clone()))
.unwrap();
let df = ctx.sql("SELECT * FROM simple_table;").await.unwrap();
df.show().await.unwrap();
}
More details:
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working