Skip to content

Commit fa0874b

Browse files
test: add regression test for unnesting dictionary encoded columns (#14395)
* chore: add regression test for unnest dict encoded cols * chore: use dataframe api for testing * chore: rm unused dep
1 parent ca9eef1 commit fa0874b

File tree

1 file changed

+67
-0
lines changed
  • datafusion/core/tests/dataframe

1 file changed

+67
-0
lines changed

datafusion/core/tests/dataframe/mod.rs

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ use datafusion_functions_aggregate::count::count_udaf;
3939
use datafusion_functions_aggregate::expr_fn::{
4040
array_agg, avg, count, count_distinct, max, median, min, sum,
4141
};
42+
use datafusion_functions_nested::make_array::make_array_udf;
4243
use datafusion_functions_window::expr_fn::{first_value, row_number};
4344
use object_store::local::LocalFileSystem;
4445
use sqlparser::ast::NullTreatment;
@@ -3358,6 +3359,72 @@ async fn unnest_columns() -> Result<()> {
33583359
Ok(())
33593360
}
33603361

3362+
#[tokio::test]
3363+
async fn unnest_dict_encoded_columns() -> Result<()> {
3364+
let strings = vec!["x", "y", "z"];
3365+
let keys = Int32Array::from_iter(0..strings.len() as i32);
3366+
3367+
let utf8_values = StringArray::from(strings.clone());
3368+
let utf8_dict = DictionaryArray::new(keys.clone(), Arc::new(utf8_values));
3369+
3370+
let make_array_udf_expr1 = make_array_udf().call(vec![col("column1")]);
3371+
let batch =
3372+
RecordBatch::try_from_iter(vec![("column1", Arc::new(utf8_dict) as ArrayRef)])?;
3373+
3374+
let ctx = SessionContext::new();
3375+
ctx.register_batch("test", batch)?;
3376+
let df = ctx
3377+
.table("test")
3378+
.await?
3379+
.select(vec![
3380+
make_array_udf_expr1.alias("make_array_expr"),
3381+
col("column1"),
3382+
])?
3383+
.unnest_columns(&["make_array_expr"])?;
3384+
3385+
let results = df.collect().await.unwrap();
3386+
let expected = [
3387+
"+-----------------+---------+",
3388+
"| make_array_expr | column1 |",
3389+
"+-----------------+---------+",
3390+
"| x | x |",
3391+
"| y | y |",
3392+
"| z | z |",
3393+
"+-----------------+---------+",
3394+
];
3395+
assert_batches_eq!(expected, &results);
3396+
3397+
// make_array(dict_encoded_string,literal string)
3398+
let make_array_udf_expr2 = make_array_udf().call(vec![
3399+
col("column1"),
3400+
lit(ScalarValue::new_utf8("fixed_string")),
3401+
]);
3402+
let df = ctx
3403+
.table("test")
3404+
.await?
3405+
.select(vec![
3406+
make_array_udf_expr2.alias("make_array_expr"),
3407+
col("column1"),
3408+
])?
3409+
.unnest_columns(&["make_array_expr"])?;
3410+
3411+
let results = df.collect().await.unwrap();
3412+
let expected = [
3413+
"+-----------------+---------+",
3414+
"| make_array_expr | column1 |",
3415+
"+-----------------+---------+",
3416+
"| x | x |",
3417+
"| fixed_string | x |",
3418+
"| y | y |",
3419+
"| fixed_string | y |",
3420+
"| z | z |",
3421+
"| fixed_string | z |",
3422+
"+-----------------+---------+",
3423+
];
3424+
assert_batches_eq!(expected, &results);
3425+
Ok(())
3426+
}
3427+
33613428
#[tokio::test]
33623429
async fn unnest_column_nulls() -> Result<()> {
33633430
let df = table_with_lists_and_nulls().await?;

0 commit comments

Comments
 (0)