Skip to content

Commit

Permalink
ARROW-6284: [C++] Allow references in std::tuple when converting tupl…
Browse files Browse the repository at this point in the history
…e to arrow array

Currently `std::vector<std::tuple>` can be converted to a table by treating each tuple as a row and by building arrays corresponding to each element of the tuple. This PR allows reference types (and const reference types) in elements of `std::tuple` which is to be converted. This is useful for converting user defined types. For example a vector of `CustomType`:

```cpp
struct CustomType {
  int8_t i8;
  uint64_t u64;
  bool b;
  std::string s;

  auto tie() const {
    return std::tie(i8, u64, b, s);
  }
};
```

can be converted to a table like this:

```cpp
using boost::adaptors::transform;

std::vector<CustomType> rows();
std::shared_ptr<Table> table;
auto rng_rows =
  transform(rows(), [](const CustomType& c) { return c.tie(); });
TableFromTupleRange(default_memory_pool(), rng_rows, names, &table);
```

This makes it easier to convert user defined types to arrow tables.

Closes #5112 from ozars/stl-ref and squashes the following commits:

6376d10 <Omer Ozarslan> Fix formatting
981edb3 <Omer Ozarslan> Test converting reference tuples to arrays
4dc780c <Omer Ozarslan> Allow references when inferring tuple element type

Authored-by: Omer Ozarslan <[email protected]>
Signed-off-by: François Saint-Jacques <[email protected]>
  • Loading branch information
ozars authored and fsaintjacques committed Aug 19, 2019
1 parent e994e9c commit 36bd667
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 4 deletions.
56 changes: 56 additions & 0 deletions cpp/src/arrow/stl-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <vector>

#include <gtest/gtest.h>
#include <boost/range/adaptor/transformed.hpp>

#include "arrow/stl.h"
#include "arrow/table.h"
Expand All @@ -30,6 +31,23 @@
using primitive_types_tuple = std::tuple<int8_t, int16_t, int32_t, int64_t, uint8_t,
uint16_t, uint32_t, uint64_t, bool, std::string>;

struct CustomType {
int8_t i8;
int16_t i16;
int32_t i32;
int64_t i64;
uint8_t u8;
uint16_t u16;
uint32_t u32;
uint64_t u64;
bool b;
std::string s;

#define ARROW_CUSTOM_TYPE_TIED std::tie(i8, i16, i32, i64, u8, u16, u32, u64, b, s)
auto tie() const -> decltype(ARROW_CUSTOM_TYPE_TIED) { return ARROW_CUSTOM_TYPE_TIED; }
#undef ARROW_CUSTOM_TYPE_TIED
};

namespace arrow {
namespace stl {

Expand Down Expand Up @@ -131,6 +149,44 @@ TEST(TestTableFromTupleVector, ListType) {
ASSERT_TRUE(expected_table->Equals(*table));
}

TEST(TestTableFromTupleVector, ReferenceTuple) {
using boost::adaptors::transform;

std::vector<std::string> names{"column1", "column2", "column3", "column4", "column5",
"column6", "column7", "column8", "column9", "column10"};
std::vector<CustomType> rows{{-1, -2, -3, -4, 1, 2, 3, 4, true, "Tests"},
{-10, -20, -30, -40, 10, 20, 30, 40, false, "Other"}};
auto rng_rows =
transform(rows, [](const CustomType& c) -> decltype(c.tie()) { return c.tie(); });
std::shared_ptr<Table> table;
ASSERT_OK(TableFromTupleRange(default_memory_pool(), rng_rows, names, &table));

std::shared_ptr<Schema> expected_schema =
schema({field("column1", int8(), false), field("column2", int16(), false),
field("column3", int32(), false), field("column4", int64(), false),
field("column5", uint8(), false), field("column6", uint16(), false),
field("column7", uint32(), false), field("column8", uint64(), false),
field("column9", boolean(), false), field("column10", utf8(), false)});

// Construct expected arrays
std::shared_ptr<Array> int8_array = ArrayFromJSON(int8(), "[-1, -10]");
std::shared_ptr<Array> int16_array = ArrayFromJSON(int16(), "[-2, -20]");
std::shared_ptr<Array> int32_array = ArrayFromJSON(int32(), "[-3, -30]");
std::shared_ptr<Array> int64_array = ArrayFromJSON(int64(), "[-4, -40]");
std::shared_ptr<Array> uint8_array = ArrayFromJSON(uint8(), "[1, 10]");
std::shared_ptr<Array> uint16_array = ArrayFromJSON(uint16(), "[2, 20]");
std::shared_ptr<Array> uint32_array = ArrayFromJSON(uint32(), "[3, 30]");
std::shared_ptr<Array> uint64_array = ArrayFromJSON(uint64(), "[4, 40]");
std::shared_ptr<Array> bool_array = ArrayFromJSON(boolean(), "[true, false]");
std::shared_ptr<Array> string_array = ArrayFromJSON(utf8(), R"(["Tests", "Other"])");
auto expected_table =
Table::Make(expected_schema,
{int8_array, int16_array, int32_array, int64_array, uint8_array,
uint16_array, uint32_array, uint64_array, bool_array, string_array});

ASSERT_TRUE(expected_table->Equals(*table));
}

TEST(TestTupleVectorFromTable, PrimitiveTypes) {
compute::FunctionContext ctx;
compute::CastOptions cast_options;
Expand Down
17 changes: 13 additions & 4 deletions cpp/src/arrow/stl.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@ class Schema;

namespace stl {

namespace internal {

template <size_t N, typename Tuple>
using BareTupleElement = typename std::remove_const<typename std::remove_reference<
typename std::tuple_element<N, Tuple>::type>::type>::type;

} // namespace internal

/// Traits meta class to map standard C/C++ types to equivalent Arrow types.
template <typename T>
struct ConversionTraits {};
Expand Down Expand Up @@ -117,7 +125,7 @@ struct ConversionTraits<std::vector<value_c_type>>
/// column names at runtime, thus these methods are not constexpr.
template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
struct SchemaFromTuple {
using Element = typename std::tuple_element<N - 1, Tuple>::type;
using Element = internal::BareTupleElement<N - 1, Tuple>;

// Implementations that take a vector-like object for the column names.

Expand Down Expand Up @@ -199,11 +207,12 @@ struct SchemaFromTuple<Tuple, 0> {
};

namespace internal {

template <typename Tuple, std::size_t N = std::tuple_size<Tuple>::value>
struct CreateBuildersRecursive {
static Status Make(MemoryPool* pool,
std::vector<std::unique_ptr<ArrayBuilder>>* builders) {
using Element = typename std::tuple_element<N - 1, Tuple>::type;
using Element = BareTupleElement<N - 1, Tuple>;
std::shared_ptr<DataType> type = ConversionTraits<Element>::type_singleton();
ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &builders->at(N - 1)));

Expand All @@ -223,7 +232,7 @@ struct RowIterator {
static Status Append(const std::vector<std::unique_ptr<ArrayBuilder>>& builders,
const Tuple& row) {
using std::get;
using Element = typename std::tuple_element<N - 1, Tuple>::type;
using Element = BareTupleElement<N - 1, Tuple>;
using BuilderType =
typename TypeTraits<typename ConversionTraits<Element>::ArrowType>::BuilderType;

Expand All @@ -249,7 +258,7 @@ struct EnsureColumnTypes {
const compute::CastOptions& cast_options,
compute::FunctionContext* ctx,
std::reference_wrapper<const ::arrow::Table>* result) {
using Element = typename std::tuple_element<N - 1, Tuple>::type;
using Element = BareTupleElement<N - 1, Tuple>;
std::shared_ptr<DataType> expected_type = ConversionTraits<Element>::type_singleton();

if (!table.schema()->field(N - 1)->type()->Equals(*expected_type)) {
Expand Down

0 comments on commit 36bd667

Please sign in to comment.