Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use arrow::MakeBuilder to create arrow array builder that exactly match the desired datatype #4280

Merged
merged 4 commits into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 2 additions & 106 deletions crates/re_types_builder/src/codegen/cpp/array_builder.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
use proc_macro2::{Ident, TokenStream};
use proc_macro2::Ident;
use quote::{format_ident, quote};

use crate::{Object, ObjectSpecifics, Objects, Type};

use super::{
forward_decl::{ForwardDecl, ForwardDecls},
includes::Includes,
quote_comment, quote_fqname_as_type_path, quote_integer, NEWLINE_TOKEN,
};
use super::forward_decl::{ForwardDecl, ForwardDecls};

pub fn arrow_array_builder_type(typ: &Type, objects: &Objects) -> Ident {
arrow_array_builder_type_and_declaration(typ, objects, &mut ForwardDecls::default())
Expand Down Expand Up @@ -107,103 +103,3 @@ pub fn arrow_array_builder_type_object(
class_ident
}
}

pub fn quote_arrow_array_builder_type_instantiation(
typ: &Type,
objects: &Objects,
cpp_includes: &mut Includes,
is_top_level_type: bool,
) -> TokenStream {
let builder_type = arrow_array_builder_type(typ, objects);

match typ {
Type::UInt8
| Type::UInt16
| Type::UInt32
| Type::UInt64
| Type::Int8
| Type::Int16
| Type::Int32
| Type::Int64
| Type::Bool
| Type::Float16
| Type::Float32
| Type::Float64
| Type::String => {
quote!(std::make_shared<arrow::#builder_type>(memory_pool))
}
Type::Vector { elem_type } => {
let element_builder = quote_arrow_array_builder_type_instantiation(
&elem_type.clone().into(),
objects,
cpp_includes,
false,
);
quote!(std::make_shared<arrow::#builder_type>(memory_pool, #element_builder))
}
Type::Array { elem_type, length } => {
let quoted_length = quote_integer(length);
let element_builder = quote_arrow_array_builder_type_instantiation(
&elem_type.clone().into(),
objects,
cpp_includes,
false,
);
quote!(std::make_shared<arrow::#builder_type>(memory_pool, #element_builder, #quoted_length))
}
Type::Object(fqname) => {
let object = &objects[fqname];

if !is_top_level_type {
// Propagating error here is hard since we're in a nested context.
// But also not that important since we *know* that this only fails for null pools and we already checked that now.
// For the unlikely broken case, Rerun result will give us a nullptr which will then
// fail the subsequent actions inside arrow, so the error will still propagate.
let quoted_fqname = quote_fqname_as_type_path(cpp_includes, fqname);
quote!(#quoted_fqname::new_arrow_array_builder(memory_pool).value)
} else if object.is_arrow_transparent() {
quote_arrow_array_builder_type_instantiation(
&object.fields[0].typ,
objects,
cpp_includes,
false,
)
} else {
let field_builders = object.fields.iter().map(|field| {
quote_arrow_array_builder_type_instantiation(
&field.typ,
objects,
cpp_includes,
false,
)
});

match object.specifics {
ObjectSpecifics::Struct => {
quote! {
std::make_shared<arrow::#builder_type>(
arrow_datatype(),
memory_pool,
std::vector<std::shared_ptr<arrow::ArrayBuilder>>({ #(#field_builders,)* })
)
}
}
ObjectSpecifics::Union { .. } => {
let children_comment = quote_comment("Children:");
quote! {
std::make_shared<arrow::#builder_type>(
memory_pool,
#NEWLINE_TOKEN
#children_comment
std::vector<std::shared_ptr<arrow::ArrayBuilder>>({
std::make_shared<arrow::NullBuilder>(memory_pool), #(#field_builders,)*
}),
arrow_datatype()
)
}
}
}
}
}
}
}
108 changes: 37 additions & 71 deletions crates/re_types_builder/src/codegen/cpp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,7 @@ use crate::{
ObjectSpecifics, Objects, Reporter, Type, ATTR_CPP_NO_FIELD_CTORS,
};

use self::array_builder::{
arrow_array_builder_type, arrow_array_builder_type_object,
quote_arrow_array_builder_type_instantiation,
};
use self::array_builder::{arrow_array_builder_type, arrow_array_builder_type_object};
use self::forward_decl::{ForwardDecl, ForwardDecls};
use self::includes::Includes;
use self::method::{Method, MethodDeclaration};
Expand Down Expand Up @@ -623,13 +620,6 @@ impl QuotedObject {
&mut cpp_includes,
&mut hpp_declarations,
));
methods.push(new_arrow_array_builder_method(
obj,
objects,
&mut hpp_includes,
&mut cpp_includes,
&mut hpp_declarations,
));
methods.push(fill_arrow_array_builder_method(
obj,
&type_ident,
Expand All @@ -641,6 +631,8 @@ impl QuotedObject {
if obj.kind == ObjectKind::Component {
methods.push(component_to_data_cell_method(
&type_ident,
obj,
objects,
&mut hpp_includes,
));
}
Expand Down Expand Up @@ -844,13 +836,6 @@ impl QuotedObject {
&mut cpp_includes,
&mut hpp_declarations,
));
methods.push(new_arrow_array_builder_method(
obj,
objects,
&mut hpp_includes,
&mut cpp_includes,
&mut hpp_declarations,
));
methods.push(fill_arrow_array_builder_method(
obj,
&pascal_case_ident,
Expand Down Expand Up @@ -1217,44 +1202,6 @@ fn arrow_data_type_method(
}
}

fn new_arrow_array_builder_method(
obj: &Object,
objects: &Objects,
hpp_includes: &mut Includes,
cpp_includes: &mut Includes,
hpp_declarations: &mut ForwardDecls,
) -> Method {
hpp_includes.insert_system("memory"); // std::shared_ptr
cpp_includes.insert_system("arrow/builder.h");
hpp_declarations.insert("arrow", ForwardDecl::Class(format_ident!("MemoryPool")));

let builder_instantiation = quote_arrow_array_builder_type_instantiation(
&Type::Object(obj.fqname.clone()),
objects,
cpp_includes,
true,
);
let arrow_builder_type = arrow_array_builder_type_object(obj, objects, hpp_declarations);

Method {
docs: "Creates a new array builder with an array of this type.".into(),
declaration: MethodDeclaration {
is_static: true,
return_type: quote! { Result<std::shared_ptr<arrow::#arrow_builder_type>> },
name_and_parameters: quote!(new_arrow_array_builder(arrow::MemoryPool * memory_pool)),
},
definition_body: quote! {
if (memory_pool == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Memory pool is null.");
}
#NEWLINE_TOKEN
#NEWLINE_TOKEN
return Result(#builder_instantiation);
},
inline: false,
}
}

fn fill_arrow_array_builder_method(
obj: &Object,
type_ident: &Ident,
Expand All @@ -1281,14 +1228,6 @@ fn fill_arrow_array_builder_method(
},
},
definition_body: quote! {
if (builder == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Passed array builder is null.");
}
if (elements == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Cannot serialize null pointer to arrow array.");
}
#NEWLINE_TOKEN
#NEWLINE_TOKEN
#fill_builder
#NEWLINE_TOKEN
#NEWLINE_TOKEN
Expand All @@ -1298,13 +1237,22 @@ fn fill_arrow_array_builder_method(
}
}

fn component_to_data_cell_method(type_ident: &Ident, hpp_includes: &mut Includes) -> Method {
fn component_to_data_cell_method(
type_ident: &Ident,
obj: &Object,
objects: &Objects,
hpp_includes: &mut Includes,
) -> Method {
hpp_includes.insert_system("memory"); // std::shared_ptr
hpp_includes.insert_rerun("data_cell.hpp");
hpp_includes.insert_rerun("result.hpp");

let todo_pool = quote_comment("TODO(andreas): Allow configuring the memory pool.");

// Only need this in the cpp file where we don't need to forward declare the arrow builder type.
let arrow_builder_type =
arrow_array_builder_type_object(obj, objects, &mut ForwardDecls::default());

Method {
docs: format!("Creates a Rerun DataCell from an array of {type_ident} components.").into(),
declaration: MethodDeclaration {
Expand All @@ -1320,12 +1268,11 @@ fn component_to_data_cell_method(type_ident: &Ident, hpp_includes: &mut Includes
arrow::MemoryPool* pool = arrow::default_memory_pool();
#NEWLINE_TOKEN
#NEWLINE_TOKEN
auto builder_result = #type_ident::new_arrow_array_builder(pool);
RR_RETURN_NOT_OK(builder_result.error);
auto builder = std::move(builder_result.value);

ARROW_ASSIGN_OR_RAISE(auto builder, arrow::MakeBuilder(arrow_datatype(), pool))
if (instances && num_instances > 0) {
RR_RETURN_NOT_OK(#type_ident::fill_arrow_array_builder(
builder.get(),
static_cast<arrow::#arrow_builder_type*>(builder.get()),
instances,
num_instances
));
Expand Down Expand Up @@ -1438,11 +1385,24 @@ fn quote_fill_arrow_array_builder(
builder: &Ident,
includes: &mut Includes,
) -> TokenStream {
let parameter_check = quote! {
if (builder == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Passed array builder is null.");
}
if (elements == nullptr) {
return rerun::Error(ErrorCode::UnexpectedNullArgument, "Cannot serialize null pointer to arrow array.");
}
#NEWLINE_TOKEN
#NEWLINE_TOKEN
};

if obj.is_arrow_transparent() {
let field = &obj.fields[0];
if let Type::Object(fqname) = &field.typ {
if field.is_nullable {
quote! {
(void)builder;
(void)elements;
(void)num_elements;
if (true) { // Works around unreachability compiler warning.
return rerun::Error(ErrorCode::NotImplemented, "TODO(andreas) Handle nullable extensions");
Expand All @@ -1459,7 +1419,12 @@ fn quote_fill_arrow_array_builder(
}
}
} else {
quote_append_field_to_builder(&obj.fields[0], builder, true, includes, objects)
let append_to_builder =
quote_append_field_to_builder(&obj.fields[0], builder, true, includes, objects);
quote! {
#parameter_check
#append_to_builder
}
}
} else {
match obj.specifics {
Expand All @@ -1480,6 +1445,7 @@ fn quote_fill_arrow_array_builder(
);

quote! {
#parameter_check
#(#fill_fields)*
#NEWLINE_TOKEN
ARROW_RETURN_NOT_OK(builder->AppendValues(static_cast<int64_t>(num_elements), nullptr));
Expand Down Expand Up @@ -1577,7 +1543,7 @@ fn quote_fill_arrow_array_builder(
});

quote! {
#NEWLINE_TOKEN
#parameter_check
ARROW_RETURN_NOT_OK(#builder->Reserve(static_cast<int64_t>(num_elements)));
for (size_t elem_idx = 0; elem_idx < num_elements; elem_idx += 1) {
const auto& union_instance = elements[elem_idx];
Expand Down
10 changes: 0 additions & 10 deletions rerun_cpp/src/rerun/blueprint/auto_space_views.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions rerun_cpp/src/rerun/blueprint/auto_space_views.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 0 additions & 18 deletions rerun_cpp/src/rerun/blueprint/entity_properties_component.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading