-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-10162: [Rust] Add pretty print support for DictionaryArray #8331
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,9 +19,13 @@ | |
|
|
||
| use crate::array; | ||
| use crate::array::{Array, PrimitiveArrayOps}; | ||
| use crate::datatypes::{DataType, TimeUnit}; | ||
| use crate::datatypes::{ | ||
| ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type, | ||
| Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type, | ||
| }; | ||
| use crate::record_batch::RecordBatch; | ||
|
|
||
| use array::DictionaryArray; | ||
| use prettytable::format; | ||
| use prettytable::{Cell, Row, Table}; | ||
|
|
||
|
|
@@ -60,7 +64,7 @@ fn create_table(results: &[RecordBatch]) -> Result<Table> { | |
| let mut cells = Vec::new(); | ||
| for col in 0..batch.num_columns() { | ||
| let column = batch.column(col); | ||
| cells.push(Cell::new(&array_value_to_string(column.clone(), row)?)); | ||
| cells.push(Cell::new(&array_value_to_string(&column, row)?)); | ||
| } | ||
| table.add_row(Row::new(cells)); | ||
| } | ||
|
|
@@ -83,8 +87,8 @@ macro_rules! make_string { | |
| }}; | ||
| } | ||
|
|
||
| /// Get the value at the given row in an array as a string | ||
| fn array_value_to_string(column: array::ArrayRef, row: usize) -> Result<String> { | ||
| /// Get the value at the given row in an array as a String | ||
| pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<String> { | ||
|
||
| match column.data_type() { | ||
| DataType::Utf8 => make_string!(array::StringArray, column, row), | ||
| DataType::Boolean => make_string!(array::BooleanArray, column, row), | ||
|
|
@@ -124,15 +128,55 @@ fn array_value_to_string(column: array::ArrayRef, row: usize) -> Result<String> | |
| DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => { | ||
| make_string!(array::Time64NanosecondArray, column, row) | ||
| } | ||
| DataType::Dictionary(index_type, _value_type) => match **index_type { | ||
| DataType::Int8 => dict_array_value_to_string::<Int8Type>(column, row), | ||
| DataType::Int16 => dict_array_value_to_string::<Int16Type>(column, row), | ||
| DataType::Int32 => dict_array_value_to_string::<Int32Type>(column, row), | ||
| DataType::Int64 => dict_array_value_to_string::<Int64Type>(column, row), | ||
| DataType::UInt8 => dict_array_value_to_string::<UInt8Type>(column, row), | ||
| DataType::UInt16 => dict_array_value_to_string::<UInt16Type>(column, row), | ||
| DataType::UInt32 => dict_array_value_to_string::<UInt32Type>(column, row), | ||
| DataType::UInt64 => dict_array_value_to_string::<UInt64Type>(column, row), | ||
| _ => Err(ArrowError::InvalidArgumentError(format!( | ||
| "Pretty printing not supported for {:?} due to index type", | ||
| column.data_type() | ||
| ))), | ||
| }, | ||
| _ => Err(ArrowError::InvalidArgumentError(format!( | ||
| "Unsupported {:?} type for repl.", | ||
| "Pretty printing not implemented for {:?} type", | ||
| column.data_type() | ||
| ))), | ||
| } | ||
| } | ||
|
|
||
| /// Converts the value of the dictionary array at `row` to a String | ||
| fn dict_array_value_to_string<K: ArrowPrimitiveType>( | ||
| colum: &array::ArrayRef, | ||
| row: usize, | ||
| ) -> Result<String> { | ||
| let dict_array = colum.as_any().downcast_ref::<DictionaryArray<K>>().unwrap(); | ||
|
|
||
| let keys_array = dict_array.keys_array(); | ||
|
|
||
| if keys_array.is_null(row) { | ||
| return Ok(String::from("")); | ||
| } | ||
|
|
||
| let dict_index = keys_array.value(row).to_usize().ok_or_else(|| { | ||
| ArrowError::InvalidArgumentError(format!( | ||
| "Can not convert value {:?} at index {:?} to usize for repl.", | ||
| keys_array.value(row), | ||
| row | ||
| )) | ||
| })?; | ||
|
|
||
| array_value_to_string(&dict_array.values(), dict_index) | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
| mod tests { | ||
| use array::{PrimitiveBuilder, StringBuilder, StringDictionaryBuilder}; | ||
|
|
||
| use super::*; | ||
| use crate::datatypes::{Field, Schema}; | ||
| use std::sync::Arc; | ||
|
|
@@ -183,4 +227,41 @@ mod tests { | |
|
|
||
| Ok(()) | ||
| } | ||
|
|
||
| #[test] | ||
| fn test_pretty_format_dictionary() -> Result<()> { | ||
| // define a schema. | ||
| let field_type = | ||
| DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)); | ||
| let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)])); | ||
|
|
||
| let keys_builder = PrimitiveBuilder::<Int32Type>::new(10); | ||
| let values_builder = StringBuilder::new(10); | ||
| let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder); | ||
|
|
||
| builder.append("one")?; | ||
| builder.append_null()?; | ||
| builder.append("three")?; | ||
| let array = Arc::new(builder.finish()); | ||
|
|
||
| let batch = RecordBatch::try_new(schema.clone(), vec![array])?; | ||
|
|
||
| let table = pretty_format_batches(&[batch])?; | ||
|
|
||
| let expected = vec![ | ||
| "+-------+", | ||
| "| d1 |", | ||
| "+-------+", | ||
| "| one |", | ||
| "| |", | ||
| "| three |", | ||
| "+-------+", | ||
| ]; | ||
|
|
||
| let actual: Vec<&str> = table.lines().collect(); | ||
|
|
||
| assert_eq!(expected, actual, "Actual result:\n{}", table); | ||
|
|
||
| Ok(()) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is no reason / need to clone to column when printing each value