Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 86 additions & 5 deletions rust/arrow/src/util/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@

use crate::array;
use crate::array::{Array, PrimitiveArrayOps};
use crate::datatypes::{DataType, TimeUnit};
use crate::datatypes::{
ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type,
Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
};
use crate::record_batch::RecordBatch;

use array::DictionaryArray;
use prettytable::format;
use prettytable::{Cell, Row, Table};

Expand Down Expand Up @@ -60,7 +64,7 @@ fn create_table(results: &[RecordBatch]) -> Result<Table> {
let mut cells = Vec::new();
for col in 0..batch.num_columns() {
let column = batch.column(col);
cells.push(Cell::new(&array_value_to_string(column.clone(), row)?));
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no reason / need to clone to column when printing each value

cells.push(Cell::new(&array_value_to_string(&column, row)?));
}
table.add_row(Row::new(cells));
}
Expand All @@ -83,8 +87,8 @@ macro_rules! make_string {
}};
}

/// Get the value at the given row in an array as a string
fn array_value_to_string(column: array::ArrayRef, row: usize) -> Result<String> {
/// Get the value at the given row in an array as a String
pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<String> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made this pub so I could call it from datafusion/src/test/sql.rs which currently has a copy/pasted version of pretty printing in array_str: https://github.com/apache/arrow/blob/master/rust/datafusion/tests/sql.rs#L646

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See #8333

match column.data_type() {
DataType::Utf8 => make_string!(array::StringArray, column, row),
DataType::Boolean => make_string!(array::BooleanArray, column, row),
Expand Down Expand Up @@ -124,15 +128,55 @@ fn array_value_to_string(column: array::ArrayRef, row: usize) -> Result<String>
DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => {
make_string!(array::Time64NanosecondArray, column, row)
}
DataType::Dictionary(index_type, _value_type) => match **index_type {
DataType::Int8 => dict_array_value_to_string::<Int8Type>(column, row),
DataType::Int16 => dict_array_value_to_string::<Int16Type>(column, row),
DataType::Int32 => dict_array_value_to_string::<Int32Type>(column, row),
DataType::Int64 => dict_array_value_to_string::<Int64Type>(column, row),
DataType::UInt8 => dict_array_value_to_string::<UInt8Type>(column, row),
DataType::UInt16 => dict_array_value_to_string::<UInt16Type>(column, row),
DataType::UInt32 => dict_array_value_to_string::<UInt32Type>(column, row),
DataType::UInt64 => dict_array_value_to_string::<UInt64Type>(column, row),
_ => Err(ArrowError::InvalidArgumentError(format!(
"Pretty printing not supported for {:?} due to index type",
column.data_type()
))),
},
_ => Err(ArrowError::InvalidArgumentError(format!(
"Unsupported {:?} type for repl.",
"Pretty printing not implemented for {:?} type",
column.data_type()
))),
}
}

/// Converts the value of the dictionary array at `row` to a String
fn dict_array_value_to_string<K: ArrowPrimitiveType>(
colum: &array::ArrayRef,
row: usize,
) -> Result<String> {
let dict_array = colum.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();

let keys_array = dict_array.keys_array();

if keys_array.is_null(row) {
return Ok(String::from(""));
}

let dict_index = keys_array.value(row).to_usize().ok_or_else(|| {
ArrowError::InvalidArgumentError(format!(
"Can not convert value {:?} at index {:?} to usize for repl.",
keys_array.value(row),
row
))
})?;

array_value_to_string(&dict_array.values(), dict_index)
}

#[cfg(test)]
mod tests {
use array::{PrimitiveBuilder, StringBuilder, StringDictionaryBuilder};

use super::*;
use crate::datatypes::{Field, Schema};
use std::sync::Arc;
Expand Down Expand Up @@ -183,4 +227,41 @@ mod tests {

Ok(())
}

#[test]
fn test_pretty_format_dictionary() -> Result<()> {
// define a schema.
let field_type =
DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)]));

let keys_builder = PrimitiveBuilder::<Int32Type>::new(10);
let values_builder = StringBuilder::new(10);
let mut builder = StringDictionaryBuilder::new(keys_builder, values_builder);

builder.append("one")?;
builder.append_null()?;
builder.append("three")?;
let array = Arc::new(builder.finish());

let batch = RecordBatch::try_new(schema.clone(), vec![array])?;

let table = pretty_format_batches(&[batch])?;

let expected = vec![
"+-------+",
"| d1 |",
"+-------+",
"| one |",
"| |",
"| three |",
"+-------+",
];

let actual: Vec<&str> = table.lines().collect();

assert_eq!(expected, actual, "Actual result:\n{}", table);

Ok(())
}
}