Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

re_format: barebone support for custom formatting #1776

Merged
merged 4 commits into from
Apr 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 8 additions & 1 deletion crates/re_arrow_store/src/arrow_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,14 @@ impl ArrayExt for dyn Array {
///
/// Nested types are expanded and cleaned recursively
fn clean_for_polars(&self) -> Box<dyn Array> {
match self.data_type() {
let datatype = self.data_type();
let datatype = if let DataType::Extension(_, inner, _) = datatype {
(**inner).clone()
} else {
datatype.clone()
};

match &datatype {
DataType::List(field) => {
// Recursively clean the contents
let typed_arr = self.as_any().downcast_ref::<ListArray<i32>>().unwrap();
Expand Down
4 changes: 3 additions & 1 deletion crates/re_format/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ version.workspace = true
[package.metadata.docs.rs]
all-features = true


[dependencies]
arrow2.workspace = true
arrow2_convert.workspace = true
comfy-table.workspace = true
parking_lot.workspace = true
re_tuid.workspace = true
93 changes: 85 additions & 8 deletions crates/re_format/src/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,91 @@
use std::fmt::Formatter;

use arrow2::{
array::{get_display, Array},
array::{get_display, Array, ListArray, StructArray},
datatypes::{DataType, IntervalUnit, TimeUnit},
};
use arrow2_convert::deserialize::TryIntoCollection;
use comfy_table::{presets, Cell, Table};

use re_tuid::Tuid;

// ---

// TODO(#1775): Registering custom formatters should be done from other crates:
// A) Because `re_format` cannot depend on other crates (cyclic deps)
// B) Because how to deserialize and inspect some type is a private implementation detail of that
// type, re_format shouldn't know how to deserialize a TUID...

type CustomFormatter<'a, F> = Box<dyn Fn(&mut F, usize) -> std::fmt::Result + 'a>;

pub fn get_custom_display<'a, F: std::fmt::Write + 'a>(
_column_name: &'a str,
array: &'a dyn Array,
null: &'static str,
) -> CustomFormatter<'a, F> {
// NOTE: If the top-level array is a list, it's probably not the type we're looking for: we're
// interested in the type of the array that's underneath.
let datatype = (|| match array.data_type().to_logical_type() {
DataType::List(_) => array
.as_any()
.downcast_ref::<ListArray<i32>>()?
.iter()
.next()?
.map(|array| array.data_type().clone()),
_ => Some(array.data_type().clone()),
})();

if let Some(DataType::Extension(name, _, _)) = datatype {
match name.as_str() {
// TODO(#1775): This should be registered dynamically.
// NOTE: Can't call `Tuid::name()`, `Component` lives in `re_log_types`.
"rerun.tuid" => Box::new(|w, index| {
if let Some(tuid) = parse_tuid(array, index) {
w.write_fmt(format_args!("{tuid}"))
} else {
w.write_str("<ERR>")
}
}),
_ => get_display(array, null),
}
} else {
get_display(array, null)
}
}

// TODO(#1775): This should be defined and registered by the `re_tuid` crate.
fn parse_tuid(array: &dyn Array, index: usize) -> Option<Tuid> {
let (array, index) = match array.data_type().to_logical_type() {
// Legacy MsgId lists: just grab the first value, they're all identical
DataType::List(_) => (
array
.as_any()
.downcast_ref::<ListArray<i32>>()?
.value(index),
0,
),
// New control columns: it's not a list to begin with!
_ => (array.to_boxed(), index),
};
let array = array.as_any().downcast_ref::<StructArray>()?;

let tuids: Vec<Tuid> = TryIntoCollection::try_into_collection(array.to_boxed()).ok()?;
tuids.get(index).copied()
}

// ---

//TODO(john) move this and the Display impl upstream into arrow2
#[repr(transparent)]
pub struct DisplayTimeUnit(TimeUnit);

impl std::fmt::Display for DisplayTimeUnit {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let s = match self.0 {
arrow2::datatypes::TimeUnit::Second => "s",
arrow2::datatypes::TimeUnit::Millisecond => "ms",
arrow2::datatypes::TimeUnit::Microsecond => "us",
arrow2::datatypes::TimeUnit::Nanosecond => "ns",
TimeUnit::Second => "s",
TimeUnit::Millisecond => "ms",
TimeUnit::Microsecond => "us",
TimeUnit::Nanosecond => "ns",
};
f.write_str(s)
}
Expand Down Expand Up @@ -133,24 +202,32 @@ where
let mut table = Table::new();
table.load_preset(presets::UTF8_FULL);

let names = names
.into_iter()
.map(|name| name.as_ref().to_owned())
.collect::<Vec<_>>();
let arrays = columns.into_iter().collect::<Vec<_>>();

let (displayers, lengths): (Vec<_>, Vec<_>) = arrays
.iter()
.map(|array| (get_display(array.as_ref(), "-"), array.as_ref().len()))
.zip(names.iter())
.map(|(array, name)| {
let formatter = get_custom_display(name, array.as_ref(), "-");
(formatter, array.as_ref().len())
})
.unzip();

if displayers.is_empty() {
return table;
}

let header = names
.into_iter()
.iter()
.zip(arrays.iter().map(|array| array.as_ref().data_type()))
.map(|(name, data_type)| {
Cell::new(format!(
"{}\n---\n{}",
name.as_ref(),
name,
DisplayDataType(data_type.clone())
))
});
Expand Down
4 changes: 2 additions & 2 deletions crates/re_log_types/src/component_types/msg_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ use crate::{Component, ComponentName};
/// # use arrow2::datatypes::{DataType, Field};
/// assert_eq!(
/// MsgId::data_type(),
/// DataType::Struct(vec![
/// DataType::Extension("rerun.tuid".into(), Box::new(DataType::Struct(vec![
/// Field::new("time_ns", DataType::UInt64, false),
/// Field::new("inc", DataType::UInt64, false),
/// ])
/// ])), None),
/// );
/// ```
#[derive(
Expand Down
49 changes: 34 additions & 15 deletions crates/re_log_types/src/data_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,15 +150,15 @@ impl std::ops::IndexMut<usize> for DataCellColumn {
///
/// The table above translates to the following, where each column is contiguous in memory:
/// ```text
/// ┌──────────────────────────────────────────────────────────┬────────────────────┬─────────────────────┬─────────────┬──────────────────────────────────┬─────────────────┐
/// │ rerun.row_id ┆ rerun.timepoint ┆ rerun.entity_path ┆ rerun.num_instances ┆ rerun.label ┆ rerun.point2d ┆ rerun.colorrgba │
/// ╞══════════════════════════════════════════════════════════╪════════════════════╪═════════════════════╪═════════════╪══════════════════════════════════╪═════════════════╡
/// │ {167967218, 54449486} ┆ [{frame_nr, 1, 1}, {clock, 1, 1}] ┆ a ┆ 2 ┆ [] ┆ [{x: 10, y: 10}, {x: 20, y: 20}] ┆ [2155905279] │
/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
/// │ {167967218, 54449486} ┆ [{frame_nr, 1, 1}, {clock, 1, 2}] ┆ b ┆ 0 ┆ - ┆ - ┆ [] │
/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
/// │ {167967218, 54449486} ┆ [{frame_nr, 1, 2}, {clock, 1, 1}] ┆ c ┆ 1 ┆ [hey] ┆ - ┆ [4294967295] │
/// └──────────────────────────────────────────────────────────┴────────────────────┴─────────────────────┴─────────────┴──────────────────────────────────┴─────────────────┘
/// ┌───────────────────────────────────────────────────────────────────────────┬───────────────────┬─────────────────────┬─────────────┬──────────────────────────────────┬─────────────────┐
/// │ frame_nr ┆ log_time ┆ rerun.row_id ┆ rerun.entity_path ┆ rerun.num_instances ┆ rerun.label ┆ rerun.point2d ┆ rerun.colorrgba │
/// ╞═══════════════════════════════════════════════════════════════════════════╪═══════════════════╪═════════════════════╪═════════════╪══════════════════════════════════╪═════════════════╡
/// │ 1 ┆ 2023-04-05 09:36:47.188796402 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ a ┆ 2 ┆ [] ┆ [{x: 10, y: 10}, {x: 20, y: 20}] ┆ [2155905279] │
/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
/// │ 1 ┆ 2023-04-05 09:36:47.188852222 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ b ┆ 0 ┆ - ┆ - ┆ [] │
/// ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
/// │ 2 ┆ 2023-04-05 09:36:47.188855872 ┆ 1753004ACBF5D6E651F2983C3DAF260C ┆ c ┆ 1 ┆ [hey] ┆ - ┆ [4294967295] │
/// └───────────────────────────────────────────────────────────────────────────┴───────────────────┴─────────────────────┴─────────────┴──────────────────────────────────┴─────────────────┘
/// ```
///
/// ## Example
Expand Down Expand Up @@ -533,8 +533,6 @@ impl DataTable {
[(METADATA_KIND.to_owned(), METADATA_KIND_CONTROL.to_owned())].into(),
);

// TODO(cmc): why do we have to do this manually on the way out, but it's done
// automatically on our behalf on the way in...?
if let DataType::Extension(name, _, _) = data.data_type() {
field
.metadata
Expand Down Expand Up @@ -627,15 +625,20 @@ impl DataTable {
.map(|cell| cell.as_arrow_ref())
.collect_vec();

let ext_name = cell_refs.first().and_then(|cell| match cell.data_type() {
DataType::Extension(name, _, _) => Some(name),
_ => None,
});

// NOTE: Avoid paying for the cost of the concatenation machinery if there's a single
// row in the column.
let data = if cell_refs.len() == 1 {
data_to_lists(column, cell_refs[0].to_boxed())
data_to_lists(column, cell_refs[0].to_boxed(), ext_name.cloned())
} else {
// NOTE: This is a column of cells, it shouldn't ever fail to concatenate since
// they share the same underlying type.
let data = arrow2::compute::concatenate::concatenate(cell_refs.as_slice())?;
data_to_lists(column, data)
data_to_lists(column, data, ext_name.cloned())
};

let field = Field::new(name, data.data_type().clone(), false)
Expand All @@ -648,10 +651,26 @@ impl DataTable {
///
/// * Before: `[C, C, C, C, C, C, C, ...]`
/// * After: `ListArray[ [[C, C], [C, C, C], None, [C], [C], ...] ]`
fn data_to_lists(column: &[Option<DataCell>], data: Box<dyn Array>) -> Box<dyn Array> {
fn data_to_lists(
column: &[Option<DataCell>],
data: Box<dyn Array>,
ext_name: Option<String>,
) -> Box<dyn Array> {
let datatype = data.data_type().clone();

let datatype = ListArray::<i32>::default_datatype(datatype);
let field = {
let mut field = Field::new("item", datatype, true);

if let Some(name) = ext_name {
field
.metadata
.extend([("ARROW:extension:name".to_owned(), name)]);
}

field
};

let datatype = DataType::List(Box::new(field));
let offsets = Offsets::try_from_lengths(column.iter().map(|cell| {
cell.as_ref()
.map_or(0, |cell| cell.num_instances() as usize)
Expand Down
28 changes: 24 additions & 4 deletions crates/re_tuid/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
#![doc = document_features::document_features!()]
//!

use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize};
use arrow2::datatypes::DataType;
use arrow2_convert::{ArrowDeserialize, ArrowSerialize};

#[derive(
Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, ArrowField, ArrowSerialize, ArrowDeserialize,
)]
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, ArrowSerialize, ArrowDeserialize)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
pub struct Tuid {
/// Approximate nanoseconds since epoch.
Expand All @@ -21,6 +20,27 @@ pub struct Tuid {
inc: u64,
}

arrow2_convert::arrow_enable_vec_for_type!(Tuid);

// TODO(#1774): shouldn't have to write this manually
impl arrow2_convert::field::ArrowField for Tuid {
type Type = Self;

fn data_type() -> arrow2::datatypes::DataType {
let datatype = arrow2::datatypes::DataType::Struct(<[_]>::into_vec(Box::new([
<u64 as arrow2_convert::field::ArrowField>::field("time_ns"),
<u64 as arrow2_convert::field::ArrowField>::field("inc"),
])));
DataType::Extension("rerun.tuid".into(), Box::new(datatype), None)
}
}

impl std::fmt::Display for Tuid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:032X}", self.as_u128())
}
}

impl std::fmt::Debug for Tuid {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:032X}", self.as_u128())
Expand Down