Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Codegen'd Rust/Arrow (de)ser 3: misc fixes & improvements #2487

Merged
merged 14 commits into from
Jun 30, 2023
Merged
2 changes: 1 addition & 1 deletion crates/re_log_types/src/data_row.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pub type DataRowResult<T> = ::std::result::Result<T, DataRowError>;

// ---

type DataCellVec = SmallVec<[DataCell; 4]>;
pub type DataCellVec = SmallVec<[DataCell; 4]>;

/// A row's worth of [`DataCell`]s: a collection of independent [`DataCell`]s with different
/// underlying datatypes and pointing to different parts of the heap.
Expand Down
2 changes: 1 addition & 1 deletion crates/re_log_types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use std::sync::Arc;
pub use self::arrow_msg::ArrowMsg;
pub use self::component::{Component, DeserializableComponent, SerializableComponent};
pub use self::data_cell::{DataCell, DataCellError, DataCellInner, DataCellResult};
pub use self::data_row::{DataRow, DataRowError, DataRowResult, RowId};
pub use self::data_row::{DataCellVec, DataRow, DataRowError, DataRowResult, RowId};
pub use self::data_table::{
DataCellColumn, DataCellOptVec, DataTable, DataTableError, DataTableResult, EntityPathVec,
ErasedTimeVec, NumInstancesVec, RowIdVec, TableId, TimePointVec, COLUMN_ENTITY_PATH,
Expand Down
1 change: 1 addition & 0 deletions crates/re_types/definitions/rerun/archetypes/points2d.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ namespace rerun.archetypes;
// TODO(#2372): archetype IDL definitions must refer to objects of kind component
// TODO(#2373): `attr.rerun.component_required` implies `required`
// TODO(#2427): distinguish optional vs. recommended in language backends
// TODO(#2521): always derive debug & clone for rust backend

/// A 2D point cloud with positions and optional colors, radii, labels, etc.
table Points2D (
Expand Down
2 changes: 1 addition & 1 deletion crates/re_types/source_hash.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This is a sha256 hash for all direct and indirect dependencies of this crate's build script.
# It can be safely removed at anytime to force the build script to run again.
# Check out build.rs to see how it's computed.
0960d9b4f6df9136f7857a7b7280a4803f3eba7a085c98aa1ce7c95dcd88539e
c10dc39333002ce5c62d9e88a7feb4fca76098528fe643012a53665a9934581e
6 changes: 5 additions & 1 deletion crates/re_types/src/components/class_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ impl crate::Component for ClassId {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::UInt16
DataType::Extension(
"rerun.components.ClassId".to_owned(),
Box::new(DataType::UInt16),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/color.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ impl crate::Component for Color {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::UInt32
DataType::Extension(
"rerun.components.Color".to_owned(),
Box::new(DataType::UInt32),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/draw_order.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ impl crate::Component for DrawOrder {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::Float32
DataType::Extension(
"rerun.components.DrawOrder".to_owned(),
Box::new(DataType::Float32),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/instance_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ impl crate::Component for InstanceKey {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::UInt64
DataType::Extension(
"rerun.components.InstanceKey".to_owned(),
Box::new(DataType::UInt64),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/keypoint_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ impl crate::Component for KeypointId {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::UInt16
DataType::Extension(
"rerun.components.KeypointId".to_owned(),
Box::new(DataType::UInt16),
None,
)
}
}
6 changes: 5 additions & 1 deletion crates/re_types/src/components/label.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ impl crate::Component for Label {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::Utf8
DataType::Extension(
"rerun.components.Label".to_owned(),
Box::new(DataType::Utf8),
None,
)
}
}
4 changes: 2 additions & 2 deletions crates/re_types/src/components/point2d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ impl crate::Component for Point2D {
Field {
name: "x".to_owned(),
data_type: DataType::Float32,
is_nullable: true,
is_nullable: false,
metadata: [].into(),
},
Field {
name: "y".to_owned(),
data_type: DataType::Float32,
is_nullable: true,
is_nullable: false,
metadata: [].into(),
},
])),
Expand Down
6 changes: 5 additions & 1 deletion crates/re_types/src/components/radius.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ impl crate::Component for Radius {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::Float32
DataType::Extension(
"rerun.components.Radius".to_owned(),
Box::new(DataType::Float32),
None,
)
}
}
20 changes: 12 additions & 8 deletions crates/re_types/src/datatypes/vec2d.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,18 @@ impl crate::Datatype for Vec2D {
#[allow(clippy::wildcard_imports)]
fn to_arrow_datatype() -> arrow2::datatypes::DataType {
use ::arrow2::datatypes::*;
DataType::FixedSizeList(
Box::new(Field {
name: "item".to_owned(),
data_type: DataType::Float32,
is_nullable: false,
metadata: [].into(),
}),
2usize,
DataType::Extension(
"rerun.datatypes.Vec2D".to_owned(),
Box::new(DataType::FixedSizeList(
Box::new(Field {
name: "item".to_owned(),
data_type: DataType::Float32,
is_nullable: false,
metadata: [].into(),
}),
2usize,
)),
None,
)
}
}
63 changes: 42 additions & 21 deletions crates/re_types_builder/src/arrow_registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use anyhow::Context as _;
use arrow2::datatypes::{DataType, Field, UnionMode};
use std::collections::{BTreeMap, HashMap};

use crate::{ElementType, Object, Type, ATTR_ARROW_SPARSE_UNION, ATTR_ARROW_TRANSPARENT};
use crate::{
ElementType, Object, ObjectField, Type, ATTR_ARROW_SPARSE_UNION, ATTR_ARROW_TRANSPARENT,
};

// --- Registry ---

Expand All @@ -18,7 +20,7 @@ pub struct ArrowRegistry {
impl ArrowRegistry {
/// Computes the Arrow datatype for the specified object and stores it in the registry, to be
/// resolved later on.
pub fn register(&mut self, obj: &Object) {
pub fn register(&mut self, obj: &mut Object) {
let (fqname, datatype) = (obj.fqname.clone(), self.arrow_datatype_from_object(obj));
self.registry.insert(fqname, datatype);
}
Expand Down Expand Up @@ -48,7 +50,7 @@ impl ArrowRegistry {

// ---

fn arrow_datatype_from_object(&self, obj: &Object) -> LazyDatatype {
fn arrow_datatype_from_object(&mut self, obj: &mut Object) -> LazyDatatype {
let is_struct = obj.is_struct();
let is_transparent = obj.try_get_attr::<String>(ATTR_ARROW_TRANSPARENT).is_some();
let num_fields = obj.fields.len();
Expand All @@ -59,18 +61,25 @@ impl ArrowRegistry {
obj.fqname,
);

if is_transparent {
self.arrow_datatype_from_type(&obj.fields[0].typ)
let datatype = if is_transparent {
LazyDatatype::Extension(
obj.fqname.clone(),
Box::new(
self.arrow_datatype_from_type(obj.fields[0].typ.clone(), &mut obj.fields[0]),
),
None,
)
} else if is_struct {
LazyDatatype::Extension(
obj.fqname.clone(),
Box::new(LazyDatatype::Struct(
obj.fields
.iter()
.map(|field| LazyField {
name: field.name.clone(),
datatype: self.arrow_datatype_from_type(&field.typ),
is_nullable: field.required,
.iter_mut()
.map(|obj_field| LazyField {
name: obj_field.name.clone(),
datatype: self
.arrow_datatype_from_type(obj_field.typ.clone(), obj_field),
is_nullable: obj_field.is_nullable,
metadata: Default::default(),
})
.collect(),
Expand All @@ -85,10 +94,10 @@ impl ArrowRegistry {
obj.fqname.clone(),
Box::new(LazyDatatype::Union(
obj.fields
.iter()
.iter_mut()
.map(|field| LazyField {
name: field.name.clone(),
datatype: self.arrow_datatype_from_type(&field.typ),
datatype: self.arrow_datatype_from_type(field.typ.clone(), field),
is_nullable: false,
metadata: Default::default(),
})
Expand All @@ -102,11 +111,18 @@ impl ArrowRegistry {
)),
None,
)
};

// NOTE: Arrow-transparent objects by definition don't have a datatype of their own.
if !is_transparent {
obj.datatype = datatype.clone().into();
}

datatype
}

fn arrow_datatype_from_type(&self, typ: &Type) -> LazyDatatype {
match typ {
fn arrow_datatype_from_type(&mut self, typ: Type, field: &mut ObjectField) -> LazyDatatype {
let datatype = match typ {
Type::UInt8 => LazyDatatype::UInt8,
Type::UInt16 => LazyDatatype::UInt16,
Type::UInt32 => LazyDatatype::UInt32,
Expand All @@ -124,22 +140,27 @@ impl ArrowRegistry {
Box::new(LazyField {
name: "item".into(),
datatype: self.arrow_datatype_from_element_type(elem_type),
is_nullable: false,
is_nullable: field.is_nullable,
metadata: Default::default(),
}),
*length,
length,
),
Type::Vector { elem_type } => LazyDatatype::List(Box::new(LazyField {
name: "item".into(),
datatype: self.arrow_datatype_from_element_type(elem_type),
is_nullable: false,
is_nullable: field.is_nullable,
metadata: Default::default(),
})),
Type::Object(fqname) => LazyDatatype::Unresolved(fqname.clone()),
}
Type::Object(fqname) => LazyDatatype::Unresolved(fqname),
};

field.datatype = datatype.clone().into();
self.registry.insert(field.fqname.clone(), datatype.clone());

datatype
}

fn arrow_datatype_from_element_type(&self, typ: &ElementType) -> LazyDatatype {
fn arrow_datatype_from_element_type(&self, typ: ElementType) -> LazyDatatype {
_ = self;
match typ {
ElementType::UInt8 => LazyDatatype::UInt8,
Expand All @@ -155,7 +176,7 @@ impl ArrowRegistry {
ElementType::Float32 => LazyDatatype::Float32,
ElementType::Float64 => LazyDatatype::Float64,
ElementType::String => LazyDatatype::Utf8,
ElementType::Object(fqname) => LazyDatatype::Unresolved(fqname.clone()),
ElementType::Object(fqname) => LazyDatatype::Unresolved(fqname),
}
}
}
Expand Down
Loading