Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 62 additions & 11 deletions rust/arrow/src/util/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,57 @@ macro_rules! make_string {
}};
}

macro_rules! make_string_date {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();

let s = if array.is_null($row) {
"".to_string()
Copy link
Contributor

@Dandandan Dandandan Jan 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't nulls be better displayed as "NULL" and add a test for it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there are tradeoffs between using empty string or some sentinel ("NULL") for display; I don't have a super strong preference though I do think the behavior should be consistent across types

This PR follows the same convention as the rest of this module and uses "" for NULL values: https://github.com/apache/arrow/pull/9263/files#diff-821be3a8a9239cdd12ab2b3c979e174f7b936aa6bfad4b9bc76489b9923cf747R38

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The NULL behavior is included in the tests below (and the empty string is visible)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense to me. I think the "problem" would be when displaying strings, as the empty string is displayed the same way as null, but I agree it is better to keep it consistent.

} else {
array
.value_as_date($row)
.map(|d| d.to_string())
.unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
};

Ok(s)
}};
}

macro_rules! make_string_time {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();

let s = if array.is_null($row) {
"".to_string()
} else {
array
.value_as_time($row)
.map(|d| d.to_string())
.unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
};

Ok(s)
}};
}

macro_rules! make_string_datetime {
($array_type:ty, $column: ident, $row: ident) => {{
let array = $column.as_any().downcast_ref::<$array_type>().unwrap();

let s = if array.is_null($row) {
"".to_string()
} else {
array
.value_as_datetime($row)
.map(|d| d.to_string())
.unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
};

Ok(s)
}};
}

// It's not possible to do array.value($row).to_string() for &[u8], let's format it as hex
macro_rules! make_string_hex {
($array_type:ty, $column: ident, $row: ident) => {{
Expand Down Expand Up @@ -104,30 +155,30 @@ pub fn array_value_to_string(column: &array::ArrayRef, row: usize) -> Result<Str
DataType::Float32 => make_string!(array::Float32Array, column, row),
DataType::Float64 => make_string!(array::Float64Array, column, row),
DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
make_string!(array::TimestampSecondArray, column, row)
make_string_datetime!(array::TimestampSecondArray, column, row)
}
DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => {
make_string!(array::TimestampMillisecondArray, column, row)
make_string_datetime!(array::TimestampMillisecondArray, column, row)
}
DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => {
make_string!(array::TimestampMicrosecondArray, column, row)
make_string_datetime!(array::TimestampMicrosecondArray, column, row)
}
DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => {
make_string!(array::TimestampNanosecondArray, column, row)
make_string_datetime!(array::TimestampNanosecondArray, column, row)
}
DataType::Date32(_) => make_string!(array::Date32Array, column, row),
DataType::Date64(_) => make_string!(array::Date64Array, column, row),
DataType::Date32(_) => make_string_date!(array::Date32Array, column, row),
DataType::Date64(_) => make_string_date!(array::Date64Array, column, row),
DataType::Time32(unit) if *unit == TimeUnit::Second => {
make_string!(array::Time32SecondArray, column, row)
make_string_time!(array::Time32SecondArray, column, row)
}
DataType::Time32(unit) if *unit == TimeUnit::Millisecond => {
make_string!(array::Time32MillisecondArray, column, row)
make_string_time!(array::Time32MillisecondArray, column, row)
}
DataType::Time32(unit) if *unit == TimeUnit::Microsecond => {
make_string!(array::Time64MicrosecondArray, column, row)
DataType::Time64(unit) if *unit == TimeUnit::Microsecond => {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was actually a bug here -- there is no such type as using Time32 for storing Microseconds -- it is actually Time64 (as can be seen in the Time64MicrosecondArray type below it)

make_string_time!(array::Time64MicrosecondArray, column, row)
}
DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => {
make_string!(array::Time64NanosecondArray, column, row)
make_string_time!(array::Time64NanosecondArray, column, row)
}
DataType::List(_) => make_string_from_list!(column, row),
DataType::Dictionary(index_type, _value_type) => match **index_type {
Expand Down
167 changes: 165 additions & 2 deletions rust/arrow/src/util/pretty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,17 @@ fn create_table(results: &[RecordBatch]) -> Result<Table> {

#[cfg(test)]
mod tests {
use crate::array::{self, PrimitiveBuilder, StringBuilder, StringDictionaryBuilder};
use crate::{
array::{
self, Array, Date32Array, Date64Array, PrimitiveBuilder, StringBuilder,
StringDictionaryBuilder, Time32MillisecondArray, Time32SecondArray,
Time64MicrosecondArray, Time64NanosecondArray, TimestampMicrosecondArray,
TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
},
datatypes::{DataType, Field, Int32Type, Schema},
};

use super::*;
use crate::datatypes::{DataType, Field, Int32Type, Schema};
use std::sync::Arc;

#[test]
Expand Down Expand Up @@ -160,4 +167,160 @@ mod tests {

Ok(())
}

/// Generate an array with type $ARRAYTYPE with a numeric value of
/// $VALUE, and compare $EXPECTED_RESULT to the output of
/// formatting that array with `pretty_format_batches`
macro_rules! check_datetime {
($ARRAYTYPE:ident, $VALUE:expr, $EXPECTED_RESULT:expr) => {
let mut builder = $ARRAYTYPE::builder(10);
builder.append_value($VALUE).unwrap();
builder.append_null().unwrap();
let array = builder.finish();

let schema = Arc::new(Schema::new(vec![Field::new(
"f",
array.data_type().clone(),
true,
)]));
let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();

let table = pretty_format_batches(&[batch]).expect("formatting batches");

let expected = $EXPECTED_RESULT;
let actual: Vec<&str> = table.lines().collect();

assert_eq!(expected, actual, "Actual result:\n\n{:#?}\n\n", actual);
};
}

#[test]
fn test_pretty_format_timestamp_second() {
let expected = vec![
"+---------------------+",
"| f |",
"+---------------------+",
"| 1970-05-09 14:25:11 |",
"| |",
"+---------------------+",
];
check_datetime!(TimestampSecondArray, 11111111, expected);
}

#[test]
fn test_pretty_format_timestamp_millisecond() {
let expected = vec![
"+-------------------------+",
"| f |",
"+-------------------------+",
"| 1970-01-01 03:05:11.111 |",
"| |",
"+-------------------------+",
];
check_datetime!(TimestampMillisecondArray, 11111111, expected);
}

#[test]
fn test_pretty_format_timestamp_microsecond() {
let expected = vec![
"+----------------------------+",
"| f |",
"+----------------------------+",
"| 1970-01-01 00:00:11.111111 |",
"| |",
"+----------------------------+",
];
check_datetime!(TimestampMicrosecondArray, 11111111, expected);
}

#[test]
fn test_pretty_format_timestamp_nanosecond() {
let expected = vec![
"+-------------------------------+",
"| f |",
"+-------------------------------+",
"| 1970-01-01 00:00:00.011111111 |",
"| |",
"+-------------------------------+",
];
check_datetime!(TimestampNanosecondArray, 11111111, expected);
}

#[test]
fn test_pretty_format_date_32() {
let expected = vec![
"+------------+",
"| f |",
"+------------+",
"| 1973-05-19 |",
"| |",
"+------------+",
];
check_datetime!(Date32Array, 1234, expected);
}

#[test]
fn test_pretty_format_date_64() {
let expected = vec![
"+------------+",
"| f |",
"+------------+",
"| 2005-03-18 |",
"| |",
"+------------+",
];
check_datetime!(Date64Array, 1111111100000, expected);
}

#[test]
fn test_pretty_format_time_32_second() {
let expected = vec![
"+----------+",
"| f |",
"+----------+",
"| 00:18:31 |",
"| |",
"+----------+",
];
check_datetime!(Time32SecondArray, 1111, expected);
}

#[test]
fn test_pretty_format_time_32_millisecond() {
let expected = vec![
"+--------------+",
"| f |",
"+--------------+",
"| 03:05:11.111 |",
"| |",
"+--------------+",
];
check_datetime!(Time32MillisecondArray, 11111111, expected);
}

#[test]
fn test_pretty_format_time_64_microsecond() {
let expected = vec![
"+-----------------+",
"| f |",
"+-----------------+",
"| 00:00:11.111111 |",
"| |",
"+-----------------+",
];
check_datetime!(Time64MicrosecondArray, 11111111, expected);
}

#[test]
fn test_pretty_format_time_64_nanosecond() {
let expected = vec![
"+--------------------+",
"| f |",
"+--------------------+",
"| 00:00:00.011111111 |",
"| |",
"+--------------------+",
];
check_datetime!(Time64NanosecondArray, 11111111, expected);
}
}