Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions cpp/src/arrow/array/array_struct_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

#include <cstdint>
#include <cstring>
#include <memory>
#include <vector>

#include <gtest/gtest.h>

#include "arrow/array.h"
#include "arrow/builder.h"
#include "arrow/status.h"
Expand Down Expand Up @@ -266,10 +266,8 @@ TEST_F(TestStructBuilder, TestAppendNull) {
ASSERT_EQ(2, result_->field(1)->length());
ASSERT_TRUE(result_->IsNull(0));
ASSERT_TRUE(result_->IsNull(1));
ASSERT_TRUE(result_->field(0)->IsNull(0));
ASSERT_TRUE(result_->field(0)->IsNull(1));
ASSERT_TRUE(result_->field(1)->IsNull(0));
ASSERT_TRUE(result_->field(1)->IsNull(1));
ASSERT_EQ(0, result_->field(0)->null_count());
ASSERT_EQ(0, result_->field(1)->null_count());

ASSERT_EQ(Type::LIST, result_->field(0)->type_id());
ASSERT_EQ(Type::INT32, result_->field(1)->type_id());
Expand Down
58 changes: 55 additions & 3 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
namespace arrow {

using internal::checked_cast;
using internal::checked_pointer_cast;

class TestArray : public ::testing::Test {
public:
Expand Down Expand Up @@ -641,7 +642,7 @@ class TestPrimitiveBuilder : public TestBuilder {
std::shared_ptr<Array> out;
FinishAndCheckPadding(builder.get(), &out);

std::shared_ptr<ArrayType> result = std::dynamic_pointer_cast<ArrayType>(out);
std::shared_ptr<ArrayType> result = checked_pointer_cast<ArrayType>(out);

// Builder is now reset
ASSERT_EQ(0, builder->length());
Expand Down Expand Up @@ -766,7 +767,7 @@ void TestPrimitiveBuilder<PBoolean>::Check(const std::unique_ptr<BooleanBuilder>
std::shared_ptr<Array> out;
FinishAndCheckPadding(builder.get(), &out);

std::shared_ptr<BooleanArray> result = std::dynamic_pointer_cast<BooleanArray>(out);
std::shared_ptr<BooleanArray> result = checked_pointer_cast<BooleanArray>(out);

ASSERT_EQ(ex_null_count, result->null_count());
ASSERT_EQ(size, result->length());
Expand Down Expand Up @@ -883,7 +884,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNull) {

std::shared_ptr<Array> out;
FinishAndCheckPadding(this->builder_.get(), &out);
auto result = std::dynamic_pointer_cast<typename TypeParam::ArrayType>(out);
auto result = checked_pointer_cast<typename TypeParam::ArrayType>(out);

for (int64_t i = 0; i < size; ++i) {
ASSERT_TRUE(result->IsNull(i)) << i;
Expand Down Expand Up @@ -917,6 +918,33 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendNulls) {
}
}

TYPED_TEST(TestPrimitiveBuilder, TestAppendEmptyValue) {
ASSERT_OK(this->builder_->AppendNull());
ASSERT_OK(this->builder_->AppendEmptyValue());
ASSERT_OK(this->builder_->AppendNulls(2));
ASSERT_OK(this->builder_->AppendEmptyValues(2));

std::shared_ptr<Array> out;
FinishAndCheckPadding(this->builder_.get(), &out);
ASSERT_OK(out->ValidateFull());

auto result = checked_pointer_cast<typename TypeParam::ArrayType>(out);
ASSERT_EQ(result->length(), 6);
ASSERT_EQ(result->null_count(), 3);

ASSERT_TRUE(result->IsNull(0));
ASSERT_FALSE(result->IsNull(1));
ASSERT_TRUE(result->IsNull(2));
ASSERT_TRUE(result->IsNull(3));
ASSERT_FALSE(result->IsNull(4));
ASSERT_FALSE(result->IsNull(5));

// implementation detail: the value slots are 0-initialized
for (int64_t i = 0; i < result->length(); ++i) {
ASSERT_EQ(result->Value(i), 0);
}
}

TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) {
DECL_T();

Expand Down Expand Up @@ -2104,6 +2132,18 @@ TEST_F(TestAdaptiveIntBuilder, TestAppendNulls) {
}
}

TEST_F(TestAdaptiveIntBuilder, TestAppendEmptyValue) {
ASSERT_OK(builder_->AppendNulls(2));
ASSERT_OK(builder_->AppendEmptyValue());
ASSERT_OK(builder_->Append(42));
ASSERT_OK(builder_->AppendEmptyValues(2));
Done();

ASSERT_OK(result_->ValidateFull());
// NOTE: The fact that we get 0 is really an implementation detail
AssertArraysEqual(*result_, *ArrayFromJSON(int8(), "[null, null, 0, 42, 0, 0]"));
}

TEST(TestAdaptiveIntBuilderWithStartIntSize, TestReset) {
auto builder = std::make_shared<AdaptiveIntBuilder>(
static_cast<uint8_t>(sizeof(int16_t)), default_memory_pool());
Expand Down Expand Up @@ -2322,6 +2362,18 @@ TEST_F(TestAdaptiveUIntBuilder, TestAppendNulls) {
}
}

TEST_F(TestAdaptiveUIntBuilder, TestAppendEmptyValue) {
ASSERT_OK(builder_->AppendNulls(2));
ASSERT_OK(builder_->AppendEmptyValue());
ASSERT_OK(builder_->Append(42));
ASSERT_OK(builder_->AppendEmptyValues(2));
Done();

ASSERT_OK(result_->ValidateFull());
// NOTE: The fact that we get 0 is really an implementation detail
AssertArraysEqual(*result_, *ArrayFromJSON(uint8(), "[null, null, 0, 42, 0, 0]"));
}

TEST(TestAdaptiveUIntBuilderWithStartIntSize, TestReset) {
auto builder = std::make_shared<AdaptiveUIntBuilder>(
static_cast<uint8_t>(sizeof(uint16_t)), default_memory_pool());
Expand Down
86 changes: 80 additions & 6 deletions cpp/src/arrow/array/array_union_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,24 @@ class UnionBuilderTest : public ::testing::Test {
AppendString("def");
AppendInt(-10);
AppendDouble(0.5);

ASSERT_OK(union_builder->Finish(&actual));
ASSERT_OK(actual->ValidateFull());
ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
}

void AppendNullsAndEmptyValues() {
AppendString("abc");
ASSERT_OK(union_builder->AppendNull());
ASSERT_OK(union_builder->AppendEmptyValue());
expected_types_vector.insert(expected_types_vector.end(), 3, I8);
AppendInt(42);
ASSERT_OK(union_builder->AppendNulls(2));
ASSERT_OK(union_builder->AppendEmptyValues(2));
expected_types_vector.insert(expected_types_vector.end(), 3, I8);

ASSERT_OK(union_builder->Finish(&actual));
ASSERT_OK(actual->ValidateFull());
ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
}

Expand All @@ -329,7 +346,9 @@ class UnionBuilderTest : public ::testing::Test {
AppendDouble(1.0);
AppendDouble(-1.0);
AppendDouble(0.5);

ASSERT_OK(union_builder->Finish(&actual));
ASSERT_OK(actual->ValidateFull());
ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);

ASSERT_EQ(I8, 0);
Expand Down Expand Up @@ -357,6 +376,7 @@ class UnionBuilderTest : public ::testing::Test {
AppendDouble(0.5);

ASSERT_OK(list_builder.Finish(actual));
ASSERT_OK((*actual)->ValidateFull());
ArrayFromVector<Int8Type, uint8_t>(expected_types_vector, &expected_types);
}

Expand All @@ -376,20 +396,20 @@ class SparseUnionBuilderTest : public UnionBuilderTest<SparseUnionBuilder> {

void AppendInt(int8_t i) override {
Base::AppendInt(i);
ASSERT_OK(str_builder->AppendNull());
ASSERT_OK(dbl_builder->AppendNull());
ASSERT_OK(str_builder->AppendEmptyValue());
ASSERT_OK(dbl_builder->AppendEmptyValue());
}

void AppendString(const std::string& str) override {
Base::AppendString(str);
ASSERT_OK(i8_builder->AppendNull());
ASSERT_OK(dbl_builder->AppendNull());
ASSERT_OK(i8_builder->AppendEmptyValue());
ASSERT_OK(dbl_builder->AppendEmptyValue());
}

void AppendDouble(double dbl) override {
Base::AppendDouble(dbl);
ASSERT_OK(i8_builder->AppendNull());
ASSERT_OK(str_builder->AppendNull());
ASSERT_OK(i8_builder->AppendEmptyValue());
ASSERT_OK(str_builder->AppendEmptyValue());
}
};

Expand All @@ -415,6 +435,34 @@ TEST_F(DenseUnionBuilderTest, Basics) {
ASSERT_ARRAYS_EQUAL(*expected, *actual);
}

TEST_F(DenseUnionBuilderTest, NullsAndEmptyValues) {
union_builder.reset(new DenseUnionBuilder(
default_memory_pool(), {i8_builder, str_builder, dbl_builder},
dense_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
{I8, STR, DBL})));
AppendNullsAndEmptyValues();

// Four null / empty values (the latter implementation-defined) were appended to I8
auto expected_i8 = ArrayFromJSON(int8(), "[null, 0, 42, null, 0]");
auto expected_str = ArrayFromJSON(utf8(), R"(["abc"])");
auto expected_dbl = ArrayFromJSON(float64(), "[]");

// "abc", null, 0, 42, null, null, 0, 0
auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 2, 3, 3, 4, 4]");

ASSERT_OK_AND_ASSIGN(auto expected,
DenseUnionArray::Make(*expected_types, *expected_offsets,
{expected_i8, expected_str, expected_dbl},
{"i8", "str", "dbl"}, {I8, STR, DBL}));

ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
ASSERT_ARRAYS_EQUAL(*expected, *actual);
// Physical arrays must be as expected
ASSERT_ARRAYS_EQUAL(*expected_i8, *actual->field(0));
ASSERT_ARRAYS_EQUAL(*expected_str, *actual->field(1));
ASSERT_ARRAYS_EQUAL(*expected_dbl, *actual->field(2));
}

TEST_F(DenseUnionBuilderTest, InferredType) {
AppendInferred();

Expand Down Expand Up @@ -467,6 +515,32 @@ TEST_F(SparseUnionBuilderTest, Basics) {
ASSERT_ARRAYS_EQUAL(*expected, *actual);
}

TEST_F(SparseUnionBuilderTest, NullsAndEmptyValues) {
union_builder.reset(new SparseUnionBuilder(
default_memory_pool(), {i8_builder, str_builder, dbl_builder},
sparse_union({field("i8", int8()), field("str", utf8()), field("dbl", float64())},
{I8, STR, DBL})));
AppendNullsAndEmptyValues();

// "abc", null, 0, 42, null, null, 0, 0
// (note that getting 0 for empty values is implementation-defined)
auto expected_i8 = ArrayFromJSON(int8(), "[0, null, 0, 42, null, null, 0, 0]");
auto expected_str = ArrayFromJSON(utf8(), R"(["abc", "", "", "", "", "", "", ""])");
auto expected_dbl = ArrayFromJSON(float64(), "[0, 0, 0, 0, 0, 0, 0, 0]");

ASSERT_OK_AND_ASSIGN(
auto expected,
SparseUnionArray::Make(*expected_types, {expected_i8, expected_str, expected_dbl},
{"i8", "str", "dbl"}, {I8, STR, DBL}));

ASSERT_EQ(expected->type()->ToString(), actual->type()->ToString());
ASSERT_ARRAYS_EQUAL(*expected, *actual);
// Physical arrays must be as expected
ASSERT_ARRAYS_EQUAL(*expected_i8, *actual->field(0));
ASSERT_ARRAYS_EQUAL(*expected_str, *actual->field(1));
ASSERT_ARRAYS_EQUAL(*expected_dbl, *actual->field(2));
}

TEST_F(SparseUnionBuilderTest, InferredType) {
AppendInferred();

Expand Down
20 changes: 20 additions & 0 deletions cpp/src/arrow/array/builder_adaptive.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,26 @@ class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
return Status::OK();
}

Status AppendEmptyValues(int64_t length) final {
ARROW_RETURN_NOT_OK(CommitPendingData());
ARROW_RETURN_NOT_OK(Reserve(length));
memset(data_->mutable_data() + length_ * int_size_, 0, int_size_ * length);
UnsafeSetNotNull(length);
return Status::OK();
}

Status AppendEmptyValue() final {
pending_data_[pending_pos_] = 0;
pending_valid_[pending_pos_] = 1;
++pending_pos_;
++length_;

if (ARROW_PREDICT_FALSE(pending_pos_ >= pending_size_)) {
return CommitPendingData();
}
return Status::OK();
}

void Reset() override;
Status Resize(int64_t capacity) override;

Expand Down
16 changes: 16 additions & 0 deletions cpp/src/arrow/array/builder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,25 @@ class ARROW_EXPORT ArrayBuilder {
/// Reset the builder.
virtual void Reset();

/// \brief Append a null value to builder
virtual Status AppendNull() = 0;
/// \brief Append a number of null values to builder
virtual Status AppendNulls(int64_t length) = 0;

/// \brief Append a non-null value to builder
///
/// The appended value is an implementation detail, but the corresponding
/// memory slot is guaranteed to be initialized.
/// This method is useful when appending a null value to a parent nested type.
virtual Status AppendEmptyValue() = 0;

/// \brief Append a number of non-null values to builder
///
/// The appended values are an implementation detail, but the corresponding
/// memory slot is guaranteed to be initialized.
/// This method is useful when appending null values to a parent nested type.
virtual Status AppendEmptyValues(int64_t length) = 0;

/// For cases where raw data was memcpy'd into the internal buffers, allows us
/// to advance the length of the builder. It is your responsibility to use
/// this function responsibly.
Expand Down
14 changes: 14 additions & 0 deletions cpp/src/arrow/array/builder_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,20 @@ Status FixedSizeBinaryBuilder::AppendNulls(int64_t length) {
return Status::OK();
}

Status FixedSizeBinaryBuilder::AppendEmptyValue() {
RETURN_NOT_OK(Reserve(1));
UnsafeAppendToBitmap(true);
byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
return Status::OK();
}

Status FixedSizeBinaryBuilder::AppendEmptyValues(int64_t length) {
RETURN_NOT_OK(Reserve(length));
UnsafeAppendToBitmap(length, true);
byte_builder_.UnsafeAppend(/*num_copies=*/length * byte_width_, 0);
return Status::OK();
}

void FixedSizeBinaryBuilder::Reset() {
ArrayBuilder::Reset();
byte_builder_.Reset();
Expand Down
Loading