Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't inline recursive datatypes in C++ backend #2765

Merged
merged 3 commits into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/re_types/source_hash.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# This is a sha256 hash for all direct and indirect dependencies of this crate's build script.
# It can be safely removed at anytime to force the build script to run again.
# Check out build.rs to see how it's computed.
9c75fa33055b75b1c29e08cddae47be88c686d3a20a97bcd584dfa664bc1c479
b35cba3304a890ee701e159b7bcc8df64877412531afae9ccf875838e1d262a9
159 changes: 83 additions & 76 deletions crates/re_types_builder/src/codegen/cpp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ fn arrow_data_type_method(
cpp_includes.system.insert("arrow/api.h".to_owned());
hpp_includes.system.insert("memory".to_owned()); // std::shared_ptr

let quoted_datatype = ArrowDataTypeTokenizer(datatype);
let quoted_datatype = quote_arrow_data_type(datatype, cpp_includes, true);

Method {
doc_string: "Returns the arrow data type this type corresponds to.".to_owned(),
Expand Down Expand Up @@ -833,95 +833,102 @@ fn quote_integer<T: std::fmt::Display>(t: T) -> TokenStream {

// --- Arrow registry code generators ---

struct ArrowDataTypeTokenizer<'a>(&'a ::arrow2::datatypes::DataType);

impl quote::ToTokens for ArrowDataTypeTokenizer<'_> {
fn to_tokens(&self, tokens: &mut TokenStream) {
use arrow2::datatypes::UnionMode;
match self.0.to_logical_type() {
DataType::Null => quote!(arrow::null()),
DataType::Boolean => quote!(arrow::boolean()),
DataType::Int8 => quote!(arrow::int8()),
DataType::Int16 => quote!(arrow::int16()),
DataType::Int32 => quote!(arrow::int32()),
DataType::Int64 => quote!(arrow::int64()),
DataType::UInt8 => quote!(arrow::uint8()),
DataType::UInt16 => quote!(arrow::uint16()),
DataType::UInt32 => quote!(arrow::uint32()),
DataType::UInt64 => quote!(arrow::uint64()),
DataType::Float16 => quote!(arrow::float16()),
DataType::Float32 => quote!(arrow::float32()),
DataType::Float64 => quote!(arrow::float64()),
DataType::Binary => quote!(arrow::binary()),
DataType::LargeBinary => quote!(arrow::large_binary()),
DataType::Utf8 => quote!(arrow::utf8()),
DataType::LargeUtf8 => quote!(arrow::large_utf8()),

DataType::List(field) => {
let field = ArrowFieldTokenizer(field);
quote!(arrow::list(#field))
}
fn quote_arrow_data_type(
datatype: &::arrow2::datatypes::DataType,
includes: &mut Includes,
is_top_level_type: bool,
) -> TokenStream {
use arrow2::datatypes::UnionMode;
match datatype {
DataType::Null => quote!(arrow::null()),
DataType::Boolean => quote!(arrow::boolean()),
DataType::Int8 => quote!(arrow::int8()),
DataType::Int16 => quote!(arrow::int16()),
DataType::Int32 => quote!(arrow::int32()),
DataType::Int64 => quote!(arrow::int64()),
DataType::UInt8 => quote!(arrow::uint8()),
DataType::UInt16 => quote!(arrow::uint16()),
DataType::UInt32 => quote!(arrow::uint32()),
DataType::UInt64 => quote!(arrow::uint64()),
DataType::Float16 => quote!(arrow::float16()),
DataType::Float32 => quote!(arrow::float32()),
DataType::Float64 => quote!(arrow::float64()),
DataType::Binary => quote!(arrow::binary()),
DataType::LargeBinary => quote!(arrow::large_binary()),
DataType::Utf8 => quote!(arrow::utf8()),
DataType::LargeUtf8 => quote!(arrow::large_utf8()),

DataType::List(field) => {
let quoted_field = quote_arrow_field(field, includes);
quote!(arrow::list(#quoted_field))
}

DataType::FixedSizeList(field, length) => {
let field = ArrowFieldTokenizer(field);
let length = quote_integer(length);
quote!(arrow::fixed_size_list(#field, #length))
}
DataType::FixedSizeList(field, length) => {
let quoted_field = quote_arrow_field(field, includes);
let quoted_length = quote_integer(length);
quote!(arrow::fixed_size_list(#quoted_field, #quoted_length))
}

DataType::Union(fields, _, mode) => {
let fields = fields.iter().map(ArrowFieldTokenizer);
match mode {
UnionMode::Dense => {
quote! { arrow::dense_union({ #(#fields,)* }) }
}
UnionMode::Sparse => {
quote! { arrow::sparse_union({ #(#fields,)* }) }
}
DataType::Union(fields, _, mode) => {
let quoted_fields = fields
.iter()
.map(|field| quote_arrow_field(field, includes));
match mode {
UnionMode::Dense => {
quote! { arrow::dense_union({ #(#quoted_fields,)* }) }
}
UnionMode::Sparse => {
quote! { arrow::sparse_union({ #(#quoted_fields,)* }) }
}
}
}

DataType::Struct(fields) => {
let fields = fields.iter().map(ArrowFieldTokenizer);
quote! { arrow::struct_({ #(#fields,)* }) }
}
DataType::Struct(fields) => {
let fields = fields
.iter()
.map(|field| quote_arrow_field(field, includes));
quote! { arrow::struct_({ #(#fields,)* }) }
}

DataType::Extension(_name, _datatype, _metadata) => {
// TODO(andreas): Need this eventually.
unimplemented!("Arrow extension types not yet implemented");
DataType::Extension(fqname, datatype, _metadata) => {
// If we're not at the top level, we should have already a `to_arrow_datatype` method that we can relay to.
// TODO(andreas): Unions don't have `to_arrow_datatype` yet.
if is_top_level_type || matches!(datatype.as_ref(), DataType::Union(..)) {
// TODO(andreas): We're no`t emitting the actual extension types here yet which is why we're skipping the extension type at top level.
// Currently, we wrap only Components in extension types but this is done in `rerun_c`.
// In the future we'll add the extension type here to the schema.
quote_arrow_data_type(datatype, includes, false)
} else {
let fqname_use = quote_fqname_as_type_path(includes, fqname);
quote! { #fqname_use::to_arrow_datatype() }
}

_ => unimplemented!("{:#?}", self.0),
}
.to_tokens(tokens);

_ => unimplemented!("{:#?}", datatype),
}
}

struct ArrowFieldTokenizer<'a>(&'a ::arrow2::datatypes::Field);

impl quote::ToTokens for ArrowFieldTokenizer<'_> {
fn to_tokens(&self, tokens: &mut TokenStream) {
let arrow2::datatypes::Field {
name,
data_type,
is_nullable,
metadata,
} = &self.0;
fn quote_arrow_field(field: &::arrow2::datatypes::Field, includes: &mut Includes) -> TokenStream {
let arrow2::datatypes::Field {
name,
data_type,
is_nullable,
metadata,
} = field;

let datatype = ArrowDataTypeTokenizer(data_type);

let metadata = if metadata.is_empty() {
quote!(nullptr)
} else {
let keys = metadata.keys();
let values = metadata.values();
quote! {
arrow::KeyValueMetadata::Make({ #(#keys,)* }, { #(#values,)* })
}
};
let datatype = quote_arrow_data_type(data_type, includes, false);

let metadata = if metadata.is_empty() {
quote!(nullptr)
} else {
let keys = metadata.keys();
let values = metadata.values();
quote! {
arrow::field(#name, #datatype, #is_nullable, #metadata)
arrow::KeyValueMetadata::Make({ #(#keys,)* }, { #(#values,)* })
}
.to_tokens(tokens);
};

quote! {
arrow::field(#name, #datatype, #is_nullable, #metadata)
}
}
27 changes: 2 additions & 25 deletions rerun_cpp/src/components/affix_fuzzer1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,13 @@

#include <arrow/api.h>

#include "../datatypes/affix_fuzzer1.hpp"
#include "affix_fuzzer1.hpp"

namespace rr {
namespace components {
std::shared_ptr<arrow::DataType> AffixFuzzer1::to_arrow_datatype() {
return arrow::struct_({
arrow::field("single_float_optional", arrow::float32(), true, nullptr),
arrow::field("single_string_required", arrow::utf8(), false, nullptr),
arrow::field("single_string_optional", arrow::utf8(), true, nullptr),
arrow::field("many_floats_optional",
arrow::list(arrow::field("item", arrow::float32(), true, nullptr)),
true,
nullptr),
arrow::field("many_strings_required",
arrow::list(arrow::field("item", arrow::utf8(), false, nullptr)),
false,
nullptr),
arrow::field("many_strings_optional",
arrow::list(arrow::field("item", arrow::utf8(), true, nullptr)),
true,
nullptr),
arrow::field("flattened_scalar", arrow::float32(), false, nullptr),
arrow::field("almost_flattened_scalar",
arrow::struct_({
arrow::field("value", arrow::float32(), false, nullptr),
}),
false,
nullptr),
arrow::field("from_parent", arrow::boolean(), true, nullptr),
});
return rr::datatypes::AffixFuzzer1::to_arrow_datatype();
}
} // namespace components
} // namespace rr
34 changes: 2 additions & 32 deletions rerun_cpp/src/components/affix_fuzzer14.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <arrow/api.h>

#include "../datatypes/affix_fuzzer1.hpp"
#include "affix_fuzzer14.hpp"

namespace rr {
Expand All @@ -15,38 +16,7 @@ namespace rr {
arrow::field(
"craziness",
arrow::list(arrow::field(
"item",
arrow::struct_({
arrow::field("single_float_optional", arrow::float32(), true, nullptr),
arrow::field("single_string_required", arrow::utf8(), false, nullptr),
arrow::field("single_string_optional", arrow::utf8(), true, nullptr),
arrow::field(
"many_floats_optional",
arrow::list(arrow::field("item", arrow::float32(), true, nullptr)),
true,
nullptr),
arrow::field(
"many_strings_required",
arrow::list(arrow::field("item", arrow::utf8(), false, nullptr)),
false,
nullptr),
arrow::field(
"many_strings_optional",
arrow::list(arrow::field("item", arrow::utf8(), true, nullptr)),
true,
nullptr),
arrow::field("flattened_scalar", arrow::float32(), false, nullptr),
arrow::field(
"almost_flattened_scalar",
arrow::struct_({
arrow::field("value", arrow::float32(), false, nullptr),
}),
false,
nullptr),
arrow::field("from_parent", arrow::boolean(), true, nullptr),
}),
false,
nullptr)),
"item", rr::datatypes::AffixFuzzer1::to_arrow_datatype(), false, nullptr)),
false,
nullptr),
arrow::field("fixed_size_shenanigans",
Expand Down
34 changes: 2 additions & 32 deletions rerun_cpp/src/components/affix_fuzzer15.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <arrow/api.h>

#include "../datatypes/affix_fuzzer1.hpp"
#include "affix_fuzzer15.hpp"

namespace rr {
Expand All @@ -15,38 +16,7 @@ namespace rr {
arrow::field(
"craziness",
arrow::list(arrow::field(
"item",
arrow::struct_({
arrow::field("single_float_optional", arrow::float32(), true, nullptr),
arrow::field("single_string_required", arrow::utf8(), false, nullptr),
arrow::field("single_string_optional", arrow::utf8(), true, nullptr),
arrow::field(
"many_floats_optional",
arrow::list(arrow::field("item", arrow::float32(), true, nullptr)),
true,
nullptr),
arrow::field(
"many_strings_required",
arrow::list(arrow::field("item", arrow::utf8(), false, nullptr)),
false,
nullptr),
arrow::field(
"many_strings_optional",
arrow::list(arrow::field("item", arrow::utf8(), true, nullptr)),
true,
nullptr),
arrow::field("flattened_scalar", arrow::float32(), false, nullptr),
arrow::field(
"almost_flattened_scalar",
arrow::struct_({
arrow::field("value", arrow::float32(), false, nullptr),
}),
false,
nullptr),
arrow::field("from_parent", arrow::boolean(), true, nullptr),
}),
false,
nullptr)),
"item", rr::datatypes::AffixFuzzer1::to_arrow_datatype(), false, nullptr)),
false,
nullptr),
arrow::field("fixed_size_shenanigans",
Expand Down
41 changes: 5 additions & 36 deletions rerun_cpp/src/components/affix_fuzzer16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <arrow/api.h>

#include "../datatypes/affix_fuzzer1.hpp"
#include "affix_fuzzer16.hpp"

namespace rr {
Expand All @@ -16,42 +17,10 @@ namespace rr {
arrow::field("radians", arrow::float32(), false, nullptr),
arrow::field(
"craziness",
arrow::list(arrow::field(
"item",
arrow::struct_({
arrow::field(
"single_float_optional", arrow::float32(), true, nullptr),
arrow::field(
"single_string_required", arrow::utf8(), false, nullptr),
arrow::field(
"single_string_optional", arrow::utf8(), true, nullptr),
arrow::field("many_floats_optional",
arrow::list(arrow::field(
"item", arrow::float32(), true, nullptr)),
true,
nullptr),
arrow::field("many_strings_required",
arrow::list(arrow::field(
"item", arrow::utf8(), false, nullptr)),
false,
nullptr),
arrow::field(
"many_strings_optional",
arrow::list(arrow::field("item", arrow::utf8(), true, nullptr)),
true,
nullptr),
arrow::field("flattened_scalar", arrow::float32(), false, nullptr),
arrow::field(
"almost_flattened_scalar",
arrow::struct_({
arrow::field("value", arrow::float32(), false, nullptr),
}),
false,
nullptr),
arrow::field("from_parent", arrow::boolean(), true, nullptr),
}),
false,
nullptr)),
arrow::list(arrow::field("item",
rr::datatypes::AffixFuzzer1::to_arrow_datatype(),
false,
nullptr)),
false,
nullptr),
arrow::field("fixed_size_shenanigans",
Expand Down
Loading