Skip to content

Commit

Permalink
Use arrow::MakeBuilder to create arrow array builder that exactly m…
Browse files Browse the repository at this point in the history
…atch the desired datatype (#4280)

### What

* Prerequisite for #4273

Previously, not knowing about `arrow::MakeBuilder`, we created the
hierarchy of arrow array builders manually in a utility method dubbed
`new_arrow_array_builder`. While working on C FFI it turned out that
this way we end up with an arrow array whose data type doesn't have the
correct nullability settings - in other words, on creating the arrow
array builders we would have needed to pass the exact data types. This
is what I tried at first and it worked, but then I noticed that there's
already `arrow::MakeBuilder` which internally uses a bunch of rolled out
switch/case statements to create the hierarchy programmatically.

It turns out that this in combination with the work done on switching
from IPC to C FFI gives another significant speed-up. When tested in
isolation however (i.e. still on IPC), the gains seem to be rather
modest and within the (large!) measurement noise.

### Checklist
* [x] I have read and agree to [Contributor
Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and
the [Code of
Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md)
* [x] I've included a screenshot or gif (if applicable)
* [x] I have tested [demo.rerun.io](https://demo.rerun.io/pr/4280) (if
applicable)
* [x] The PR title and labels are set such as to maximize their
usefulness for the next release's CHANGELOG

- [PR Build Summary](https://build.rerun.io/pr/4280)
- [Docs
preview](https://rerun.io/preview/030bafa44afbd77f510d15db7c3d254386f1aae9/docs)
<!--DOCS-PREVIEW-->
- [Examples
preview](https://rerun.io/preview/030bafa44afbd77f510d15db7c3d254386f1aae9/examples)
<!--EXAMPLES-PREVIEW-->
- [Recent benchmark results](https://build.rerun.io/graphs/crates.html)
- [Wasm size tracking](https://build.rerun.io/graphs/sizes.html)
  • Loading branch information
Wumpf authored Nov 21, 2023
1 parent 83d7686 commit 16b4c3c
Show file tree
Hide file tree
Showing 204 changed files with 361 additions and 2,802 deletions.
108 changes: 2 additions & 106 deletions crates/re_types_builder/src/codegen/cpp/array_builder.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
use proc_macro2::{Ident, TokenStream};
use proc_macro2::Ident;
use quote::{format_ident, quote};

use crate::{Object, ObjectSpecifics, Objects, Type};

use super::{
forward_decl::{ForwardDecl, ForwardDecls},
includes::Includes,
quote_comment, quote_fqname_as_type_path, quote_integer, NEWLINE_TOKEN,
};
use super::forward_decl::{ForwardDecl, ForwardDecls};

pub fn arrow_array_builder_type(typ: &Type, objects: &Objects) -> Ident {
arrow_array_builder_type_and_declaration(typ, objects, &mut ForwardDecls::default())
Expand Down Expand Up @@ -107,103 +103,3 @@ pub fn arrow_array_builder_type_object(
class_ident
}
}

pub fn quote_arrow_array_builder_type_instantiation(
typ: &Type,
objects: &Objects,
cpp_includes: &mut Includes,
is_top_level_type: bool,
) -> TokenStream {
let builder_type = arrow_array_builder_type(typ, objects);

match typ {
Type::UInt8
| Type::UInt16
| Type::UInt32
| Type::UInt64
| Type::Int8
| Type::Int16
| Type::Int32
| Type::Int64
| Type::Bool
| Type::Float16
| Type::Float32
| Type::Float64
| Type::String => {
quote!(std::make_shared<arrow::#builder_type>(memory_pool))
}
Type::Vector { elem_type } => {
let element_builder = quote_arrow_array_builder_type_instantiation(
&elem_type.clone().into(),
objects,
cpp_includes,
false,
);
quote!(std::make_shared<arrow::#builder_type>(memory_pool, #element_builder))
}
Type::Array { elem_type, length } => {
let quoted_length = quote_integer(length);
let element_builder = quote_arrow_array_builder_type_instantiation(
&elem_type.clone().into(),
objects,
cpp_includes,
false,
);
quote!(std::make_shared<arrow::#builder_type>(memory_pool, #element_builder, #quoted_length))
}
Type::Object(fqname) => {
let object = &objects[fqname];

if !is_top_level_type {
// Propagating error here is hard since we're in a nested context.
// But also not that important since we *know* that this only fails for null pools and we already checked that now.
// For the unlikely broken case, Rerun result will give us a nullptr which will then
// fail the subsequent actions inside arrow, so the error will still propagate.
let quoted_fqname = quote_fqname_as_type_path(cpp_includes, fqname);
quote!(#quoted_fqname::new_arrow_array_builder(memory_pool).value)
} else if object.is_arrow_transparent() {
quote_arrow_array_builder_type_instantiation(
&object.fields[0].typ,
objects,
cpp_includes,
false,
)
} else {
let field_builders = object.fields.iter().map(|field| {
quote_arrow_array_builder_type_instantiation(
&field.typ,
objects,
cpp_includes,
false,
)
});

match object.specifics {
ObjectSpecifics::Struct => {
quote! {
std::make_shared<arrow::#builder_type>(
arrow_datatype(),
memory_pool,
std::vector<std::shared_ptr<arrow::ArrayBuilder>>({ #(#field_builders,)* })
)
}
}
ObjectSpecifics::Union { .. } => {
let children_comment = quote_comment("Children:");
quote! {
std::make_shared<arrow::#builder_type>(
memory_pool,
#NEWLINE_TOKEN
#children_comment
std::vector<std::shared_ptr<arrow::ArrayBuilder>>({
std::make_shared<arrow::NullBuilder>(memory_pool), #(#field_builders,)*
}),
arrow_datatype()
)
}
}
}
}
}
}
}
108 changes: 37 additions & 71 deletions crates/re_types_builder/src/codegen/cpp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@ use crate::{
ObjectSpecifics, Objects, Reporter, Type, ATTR_CPP_NO_FIELD_CTORS,
};

use self::array_builder::{
arrow_array_builder_type, arrow_array_builder_type_object,
quote_arrow_array_builder_type_instantiation,
};
use self::array_builder::{arrow_array_builder_type, arrow_array_builder_type_object};
use self::forward_decl::{ForwardDecl, ForwardDecls};
use self::includes::Includes;
use self::method::{Method, MethodDeclaration};
Expand Down Expand Up @@ -623,13 +620,6 @@ impl QuotedObject {
&mut cpp_includes,
&mut hpp_declarations,
));
methods.push(new_arrow_array_builder_method(
obj,
objects,
&mut hpp_includes,
&mut cpp_includes,
&mut hpp_declarations,
));
methods.push(fill_arrow_array_builder_method(
obj,
&type_ident,
Expand All @@ -641,6 +631,8 @@ impl QuotedObject {
if obj.kind == ObjectKind::Component {
methods.push(component_to_data_cell_method(
&type_ident,
obj,
objects,
&mut hpp_includes,
));
}
Expand Down Expand Up @@ -844,13 +836,6 @@ impl QuotedObject {
&mut cpp_includes,
&mut hpp_declarations,
));
methods.push(new_arrow_array_builder_method(
obj,
objects,
&mut hpp_includes,
&mut cpp_includes,
&mut hpp_declarations,
));
methods.push(fill_arrow_array_builder_method(
obj,
&pascal_case_ident,
Expand Down Expand Up @@ -1217,44 +1202,6 @@ fn arrow_data_type_method(
}
}

fn new_arrow_array_builder_method(
obj: &Object,
objects: &Objects,
hpp_includes: &mut Includes,
cpp_includes: &mut Includes,
hpp_declarations: &mut ForwardDecls,
) -> Method {
hpp_includes.insert_system("memory"); // std::shared_ptr
cpp_includes.insert_system("arrow/builder.h");
hpp_declarations.insert("arrow", ForwardDecl::Class(format_ident!("MemoryPool")));

let builder_instantiation = quote_arrow_array_builder_type_instantiation(
&Type::Object(obj.fqname.clone()),
objects,
cpp_includes,
true,
);
let arrow_builder_type = arrow_array_builder_type_object(obj, objects, hpp_declarations);

Method {
docs: "Creates a new array builder with an array of this type.".into(),
declaration: MethodDeclaration {
is_static: true,
return_type: quote! { Result<std::shared_ptr<arrow::#arrow_builder_type>> },
name_and_parameters: quote!(new_arrow_array_builder(arrow::MemoryPool * memory_pool)),
},
definition_body: quote! {
if (memory_pool == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Memory pool is null.");
}
#NEWLINE_TOKEN
#NEWLINE_TOKEN
return Result(#builder_instantiation);
},
inline: false,
}
}

fn fill_arrow_array_builder_method(
obj: &Object,
type_ident: &Ident,
Expand All @@ -1281,14 +1228,6 @@ fn fill_arrow_array_builder_method(
},
},
definition_body: quote! {
if (builder == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Passed array builder is null.");
}
if (elements == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Cannot serialize null pointer to arrow array.");
}
#NEWLINE_TOKEN
#NEWLINE_TOKEN
#fill_builder
#NEWLINE_TOKEN
#NEWLINE_TOKEN
Expand All @@ -1298,13 +1237,22 @@ fn fill_arrow_array_builder_method(
}
}

fn component_to_data_cell_method(type_ident: &Ident, hpp_includes: &mut Includes) -> Method {
fn component_to_data_cell_method(
type_ident: &Ident,
obj: &Object,
objects: &Objects,
hpp_includes: &mut Includes,
) -> Method {
hpp_includes.insert_system("memory"); // std::shared_ptr
hpp_includes.insert_rerun("data_cell.hpp");
hpp_includes.insert_rerun("result.hpp");

let todo_pool = quote_comment("TODO(andreas): Allow configuring the memory pool.");

// Only need this in the cpp file where we don't need to forward declare the arrow builder type.
let arrow_builder_type =
arrow_array_builder_type_object(obj, objects, &mut ForwardDecls::default());

Method {
docs: format!("Creates a Rerun DataCell from an array of {type_ident} components.").into(),
declaration: MethodDeclaration {
Expand All @@ -1320,12 +1268,11 @@ fn component_to_data_cell_method(type_ident: &Ident, hpp_includes: &mut Includes
arrow::MemoryPool* pool = arrow::default_memory_pool();
#NEWLINE_TOKEN
#NEWLINE_TOKEN
auto builder_result = #type_ident::new_arrow_array_builder(pool);
RR_RETURN_NOT_OK(builder_result.error);
auto builder = std::move(builder_result.value);

ARROW_ASSIGN_OR_RAISE(auto builder, arrow::MakeBuilder(arrow_datatype(), pool))
if (instances && num_instances > 0) {
RR_RETURN_NOT_OK(#type_ident::fill_arrow_array_builder(
builder.get(),
static_cast<arrow::#arrow_builder_type*>(builder.get()),
instances,
num_instances
));
Expand Down Expand Up @@ -1438,11 +1385,24 @@ fn quote_fill_arrow_array_builder(
builder: &Ident,
includes: &mut Includes,
) -> TokenStream {
let parameter_check = quote! {
if (builder == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Passed array builder is null.");
}
if (elements == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Cannot serialize null pointer to arrow array.");
}
#NEWLINE_TOKEN
#NEWLINE_TOKEN
};

if obj.is_arrow_transparent() {
let field = &obj.fields[0];
if let Type::Object(fqname) = &field.typ {
if field.is_nullable {
quote! {
(void)builder;
(void)elements;
(void)num_elements;
if (true) { // Works around unreachability compiler warning.
return rerun::Error(ErrorCode::NotImplemented, "TODO(andreas) Handle nullable extensions");
Expand All @@ -1459,7 +1419,12 @@ fn quote_fill_arrow_array_builder(
}
}
} else {
quote_append_field_to_builder(&obj.fields[0], builder, true, includes, objects)
let append_to_builder =
quote_append_field_to_builder(&obj.fields[0], builder, true, includes, objects);
quote! {
#parameter_check
#append_to_builder
}
}
} else {
match obj.specifics {
Expand All @@ -1480,6 +1445,7 @@ fn quote_fill_arrow_array_builder(
);

quote! {
#parameter_check
#(#fill_fields)*
#NEWLINE_TOKEN
ARROW_RETURN_NOT_OK(builder->AppendValues(static_cast<int64_t>(num_elements), nullptr));
Expand Down Expand Up @@ -1577,7 +1543,7 @@ fn quote_fill_arrow_array_builder(
});

quote! {
#NEWLINE_TOKEN
#parameter_check
ARROW_RETURN_NOT_OK(#builder->Reserve(static_cast<int64_t>(num_elements)));
for (size_t elem_idx = 0; elem_idx < num_elements; elem_idx += 1) {
const auto& union_instance = elements[elem_idx];
Expand Down
10 changes: 0 additions & 10 deletions rerun_cpp/src/rerun/blueprint/auto_space_views.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions rerun_cpp/src/rerun/blueprint/auto_space_views.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 0 additions & 18 deletions rerun_cpp/src/rerun/blueprint/entity_properties_component.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 16b4c3c

Please sign in to comment.