diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index 3eb9b033b92..799e2a31f1e 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -173,6 +173,10 @@ class ARROW_EXPORT Buffer { return util::string_view(reinterpret_cast(data_), size_); } + /// \brief View buffer contents as a util::bytes_view + /// \return util::bytes_view + explicit operator util::bytes_view() const { return util::bytes_view(data_, size_); } + /// \brief Return a pointer to the buffer's data const uint8_t* data() const { return data_; } /// \brief Return a writable pointer to the buffer's data diff --git a/cpp/src/arrow/compute/kernels/boolean.cc b/cpp/src/arrow/compute/kernels/boolean.cc index 19f3bb7b94a..020af6cb98d 100644 --- a/cpp/src/arrow/compute/kernels/boolean.cc +++ b/cpp/src/arrow/compute/kernels/boolean.cc @@ -17,7 +17,9 @@ #include "arrow/compute/kernels/boolean.h" +#include #include +#include #include #include "arrow/array.h" @@ -31,6 +33,7 @@ namespace arrow { +using internal::Bitmap; using internal::BitmapAnd; using internal::BitmapOr; using internal::BitmapXor; @@ -76,7 +79,13 @@ Status Invert(FunctionContext* ctx, const Datum& value, Datum* out) { return Status::OK(); } +enum class ResolveNull { KLEENE_LOGIC, PROPAGATE }; + class BinaryBooleanKernel : public BinaryKernel { + public: + explicit BinaryBooleanKernel(ResolveNull resolve_null) : resolve_null_(resolve_null) {} + + protected: virtual Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) = 0; @@ -91,50 +100,156 @@ class BinaryBooleanKernel : public BinaryKernel { ArrayData* result; result = out->array().get(); - RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left_data, right_data, result)); return Compute(ctx, left_data, right_data, result); } std::shared_ptr out_type() const override { return boolean(); } + + enum BitmapIndex { LEFT_VALID, LEFT_DATA, RIGHT_VALID, RIGHT_DATA }; + + template + Status ComputeKleene(ComputeWord&& compute_word, FunctionContext* ctx, + const ArrayData& left, const ArrayData& right, ArrayData* out) { + DCHECK(left.null_count != 0 || right.null_count != 0); + + Bitmap bitmaps[4]; + bitmaps[LEFT_VALID] = {left.buffers[0], left.offset, left.length}; + bitmaps[LEFT_DATA] = {left.buffers[1], left.offset, left.length}; + + bitmaps[RIGHT_VALID] = {right.buffers[0], right.offset, right.length}; + bitmaps[RIGHT_DATA] = {right.buffers[1], right.offset, right.length}; + + RETURN_NOT_OK(AllocateEmptyBitmap(ctx->memory_pool(), out->length, &out->buffers[0])); + + auto out_validity = out->GetMutableValues(0); + auto out_data = out->GetMutableValues(1); + + int64_t i = 0; + auto apply = [&](uint64_t left_valid, uint64_t left_data, uint64_t right_valid, + uint64_t right_data) { + auto left_true = left_valid & left_data; + auto left_false = left_valid & ~left_data; + + auto right_true = right_valid & right_data; + auto right_false = right_valid & ~right_data; + + compute_word(left_true, left_false, right_true, right_false, &out_validity[i], + &out_data[i]); + ++i; + }; + + if (right.null_count == 0 || left.null_count == 0) { + if (left.null_count == 0) { + // ensure only bitmaps[RIGHT_VALID].buffer might be null + std::swap(bitmaps[LEFT_VALID], bitmaps[RIGHT_VALID]); + std::swap(bitmaps[LEFT_DATA], bitmaps[RIGHT_DATA]); + } + // override bitmaps[RIGHT_VALID] to make it safe for Visit() + bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA]; + + Bitmap::VisitWords(bitmaps, [&](std::array words) { + apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]); + }); + } else { + Bitmap::VisitWords(bitmaps, [&](std::array words) { + apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]); + }); + } + return Status::OK(); + } + + ResolveNull resolve_null_; }; class AndKernel : public BinaryBooleanKernel { + public: + using BinaryBooleanKernel::BinaryBooleanKernel; + + private: Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) override { - if (right.length > 0) { - BitmapAnd(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), - right.offset, right.length, 0, out->buffers[1]->mutable_data()); + if (resolve_null_ == ResolveNull::PROPAGATE || + (left.GetNullCount() == 0 && right.GetNullCount() == 0)) { + RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); + if (right.length > 0) { + BitmapAnd(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), + right.offset, right.length, 0, out->buffers[1]->mutable_data()); + } + return Status::OK(); } - return Status::OK(); + + auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true, + uint64_t right_false, uint64_t* out_valid, + uint64_t* out_data) { + *out_data = left_true & right_true; + *out_valid = left_false | right_false | (left_true & right_true); + }; + + return ComputeKleene(compute_word, ctx, left, right, out); } }; Status And(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) { - AndKernel and_kernel; + AndKernel and_kernel(ResolveNull::PROPAGATE); + detail::PrimitiveAllocatingBinaryKernel kernel(&and_kernel); + return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out); +} + +Status KleeneAnd(FunctionContext* ctx, const Datum& left, const Datum& right, + Datum* out) { + AndKernel and_kernel(ResolveNull::KLEENE_LOGIC); detail::PrimitiveAllocatingBinaryKernel kernel(&and_kernel); return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out); } class OrKernel : public BinaryBooleanKernel { + public: + using BinaryBooleanKernel::BinaryBooleanKernel; + + private: Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) override { - if (right.length > 0) { - BitmapOr(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), - right.offset, right.length, 0, out->buffers[1]->mutable_data()); + if (resolve_null_ == ResolveNull::PROPAGATE || + (left.GetNullCount() == 0 && right.GetNullCount() == 0)) { + RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); + if (right.length > 0) { + BitmapOr(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), + right.offset, right.length, 0, out->buffers[1]->mutable_data()); + } + return Status::OK(); } - return Status::OK(); + + static auto compute_word = [](uint64_t left_true, uint64_t left_false, + uint64_t right_true, uint64_t right_false, + uint64_t* out_valid, uint64_t* out_data) { + *out_data = left_true | right_true; + *out_valid = left_true | right_true | (left_false & right_false); + }; + + return ComputeKleene(compute_word, ctx, left, right, out); } }; Status Or(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) { - OrKernel or_kernel; + OrKernel or_kernel(ResolveNull::PROPAGATE); + detail::PrimitiveAllocatingBinaryKernel kernel(&or_kernel); + return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out); +} + +Status KleeneOr(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) { + OrKernel or_kernel(ResolveNull::KLEENE_LOGIC); detail::PrimitiveAllocatingBinaryKernel kernel(&or_kernel); return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out); } class XorKernel : public BinaryBooleanKernel { + public: + XorKernel() : BinaryBooleanKernel(ResolveNull::PROPAGATE) {} + + private: Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) override { + RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); if (right.length > 0) { BitmapXor(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), right.offset, right.length, 0, out->buffers[1]->mutable_data()); diff --git a/cpp/src/arrow/compute/kernels/boolean.h b/cpp/src/arrow/compute/kernels/boolean.h index fb88659dbc4..06bb5caa605 100644 --- a/cpp/src/arrow/compute/kernels/boolean.h +++ b/cpp/src/arrow/compute/kernels/boolean.h @@ -37,7 +37,9 @@ class FunctionContext; ARROW_EXPORT Status Invert(FunctionContext* context, const Datum& value, Datum* out); -/// \brief Element-wise AND of two boolean datums +/// \brief Element-wise AND of two boolean datums which always propagates nulls +/// (null and false is null). +/// /// \param[in] context the FunctionContext /// \param[in] left left operand (array) /// \param[in] right right operand (array) @@ -48,7 +50,23 @@ Status Invert(FunctionContext* context, const Datum& value, Datum* out); ARROW_EXPORT Status And(FunctionContext* context, const Datum& left, const Datum& right, Datum* out); -/// \brief Element-wise OR of two boolean datums +/// \brief Element-wise AND of two boolean datums with a Kleene truth table +/// (null and false is false). +/// +/// \param[in] context the FunctionContext +/// \param[in] left left operand (array) +/// \param[in] right right operand (array) +/// \param[out] out resulting datum +/// +/// \since 1.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Status KleeneAnd(FunctionContext* context, const Datum& left, const Datum& right, + Datum* out); + +/// \brief Element-wise OR of two boolean datums which always propagates nulls +/// (null and true is null). +/// /// \param[in] context the FunctionContext /// \param[in] left left operand (array) /// \param[in] right right operand (array) @@ -59,6 +77,20 @@ Status And(FunctionContext* context, const Datum& left, const Datum& right, Datu ARROW_EXPORT Status Or(FunctionContext* context, const Datum& left, const Datum& right, Datum* out); +/// \brief Element-wise OR of two boolean datums with a Kleene truth table +/// (null or true is true). +/// +/// \param[in] context the FunctionContext +/// \param[in] left left operand (array) +/// \param[in] right right operand (array) +/// \param[out] out resulting datum +/// +/// \since 1.0.0 +/// \note API not yet finalized +ARROW_EXPORT +Status KleeneOr(FunctionContext* context, const Datum& left, const Datum& right, + Datum* out); + /// \brief Element-wise XOR of two boolean datums /// \param[in] context the FunctionContext /// \param[in] left left operand (array) diff --git a/cpp/src/arrow/compute/kernels/boolean_test.cc b/cpp/src/arrow/compute/kernels/boolean_test.cc index b7f57840f87..04d7e500843 100644 --- a/cpp/src/arrow/compute/kernels/boolean_test.cc +++ b/cpp/src/arrow/compute/kernels/boolean_test.cc @@ -44,10 +44,16 @@ class TestBooleanKernel : public ComputeFixture, public TestBase { const std::shared_ptr& right, const std::shared_ptr& expected) { Datum result; + ASSERT_OK(kernel(&this->ctx_, left, right, &result)); ASSERT_EQ(Datum::ARRAY, result.kind()); std::shared_ptr result_array = result.make_array(); ASSERT_ARRAYS_EQUAL(*expected, *result_array); + + ASSERT_OK(kernel(&this->ctx_, right, left, &result)); + ASSERT_EQ(Datum::ARRAY, result.kind()); + result_array = result.make_array(); + ASSERT_ARRAYS_EQUAL(*expected, *result_array); } void TestChunkedArrayBinary(const BinaryKernelFunc& kernel, @@ -55,44 +61,51 @@ class TestBooleanKernel : public ComputeFixture, public TestBase { const std::shared_ptr& right, const std::shared_ptr& expected) { Datum result; - std::shared_ptr result_array; + ASSERT_OK(kernel(&this->ctx_, left, right, &result)); ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind()); std::shared_ptr result_ca = result.chunked_array(); ASSERT_TRUE(result_ca->Equals(expected)); + + ASSERT_OK(kernel(&this->ctx_, right, left, &result)); + ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind()); + result_ca = result.chunked_array(); + ASSERT_TRUE(result_ca->Equals(expected)); } - void TestBinaryKernel(const BinaryKernelFunc& kernel, const std::vector& values1, - const std::vector& values2, - const std::vector& values3, - const std::vector& values3_nulls) { - auto type = boolean(); - auto a1 = _MakeArray(type, values1, {}); - auto a2 = _MakeArray(type, values2, {}); - auto a3 = _MakeArray(type, values3, {}); - auto a1_nulls = _MakeArray(type, values1, values1); - auto a2_nulls = _MakeArray(type, values2, values2); - auto a3_nulls = _MakeArray(type, values3, values3_nulls); - - TestArrayBinary(kernel, a1, a2, a3); - TestArrayBinary(kernel, a1_nulls, a2_nulls, a3_nulls); - TestArrayBinary(kernel, a1->Slice(1), a2->Slice(1), a3->Slice(1)); - TestArrayBinary(kernel, a1_nulls->Slice(1), a2_nulls->Slice(1), a3_nulls->Slice(1)); + void TestBinaryKernel(const BinaryKernelFunc& kernel, + const std::shared_ptr& left, + const std::shared_ptr& right, + const std::shared_ptr& expected) { + TestArrayBinary(kernel, left, right, expected); + TestArrayBinary(kernel, left->Slice(1), right->Slice(1), expected->Slice(1)); // ChunkedArray - std::vector> ca1_arrs = {a1, a1->Slice(1)}; - auto ca1 = std::make_shared(ca1_arrs); - std::vector> ca2_arrs = {a2, a2->Slice(1)}; - auto ca2 = std::make_shared(ca2_arrs); - std::vector> ca3_arrs = {a3, a3->Slice(1)}; - auto ca3 = std::make_shared(ca3_arrs); - TestChunkedArrayBinary(kernel, ca1, ca2, ca3); + auto cleft = std::make_shared(ArrayVector{left, left->Slice(1)}); + auto cright = std::make_shared(ArrayVector{right, right->Slice(1)}); + auto cexpected = + std::make_shared(ArrayVector{expected, expected->Slice(1)}); + TestChunkedArrayBinary(kernel, cleft, cright, cexpected); // ChunkedArray with different chunks - std::vector> ca4_arrs = {a1->Slice(0, 1), a1->Slice(1), - a1->Slice(1, 1), a1->Slice(2)}; - auto ca4 = std::make_shared(ca4_arrs); - TestChunkedArrayBinary(kernel, ca4, ca2, ca3); + cleft = std::make_shared(ArrayVector{ + left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)}); + TestChunkedArrayBinary(kernel, cleft, cright, cexpected); + } + + void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel, + const std::vector& left, + const std::vector& right, + const std::vector& expected, + const std::vector& expected_nulls) { + auto type = boolean(); + TestBinaryKernel(kernel, _MakeArray(type, left, {}), + _MakeArray(type, right, {}), + _MakeArray(type, expected, {})); + + TestBinaryKernel(kernel, _MakeArray(type, left, left), + _MakeArray(type, right, right), + _MakeArray(type, expected, expected_nulls)); } }; @@ -154,7 +167,7 @@ TEST_F(TestBooleanKernel, And) { std::vector values1 = {true, false, true, false, true, true}; std::vector values2 = {true, true, false, false, true, false}; std::vector values3 = {true, false, false, false, true, false}; - TestBinaryKernel(And, values1, values2, values3, values3); + TestBinaryKernelPropagate(And, values1, values2, values3, values3); } TEST_F(TestBooleanKernel, Or) { @@ -162,7 +175,7 @@ TEST_F(TestBooleanKernel, Or) { std::vector values2 = {true, true, false, false, true, false}; std::vector values3 = {true, true, true, false, true, true}; std::vector values3_nulls = {true, false, false, false, true, false}; - TestBinaryKernel(Or, values1, values2, values3, values3_nulls); + TestBinaryKernelPropagate(Or, values1, values2, values3, values3_nulls); } TEST_F(TestBooleanKernel, Xor) { @@ -170,7 +183,31 @@ TEST_F(TestBooleanKernel, Xor) { std::vector values2 = {true, true, false, false, true, false}; std::vector values3 = {false, true, true, false, false, true}; std::vector values3_nulls = {true, false, false, false, true, false}; - TestBinaryKernel(Xor, values1, values2, values3, values3_nulls); + TestBinaryKernelPropagate(Xor, values1, values2, values3, values3_nulls); +} + +TEST_F(TestBooleanKernel, KleeneAnd) { + auto left = ArrayFromJSON(boolean(), " [true, true, true, false, false, null]"); + auto right = ArrayFromJSON(boolean(), " [true, false, null, false, null, null]"); + auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, false, null]"); + TestBinaryKernel(KleeneAnd, left, right, expected); + + left = ArrayFromJSON(boolean(), " [true, true, false, null, null]"); + right = ArrayFromJSON(boolean(), " [true, false, false, true, false]"); + expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]"); + TestBinaryKernel(KleeneAnd, left, right, expected); +} + +TEST_F(TestBooleanKernel, KleeneOr) { + auto left = ArrayFromJSON(boolean(), " [true, true, true, false, false, null]"); + auto right = ArrayFromJSON(boolean(), " [true, false, null, false, null, null]"); + auto expected = ArrayFromJSON(boolean(), "[true, true, true, false, null, null]"); + TestBinaryKernel(KleeneOr, left, right, expected); + + left = ArrayFromJSON(boolean(), " [true, true, false, null, null]"); + right = ArrayFromJSON(boolean(), " [true, false, false, true, false]"); + expected = ArrayFromJSON(boolean(), "[true, true, false, true, null]"); + TestBinaryKernel(KleeneOr, left, right, expected); } } // namespace compute diff --git a/cpp/src/arrow/dataset/filter.cc b/cpp/src/arrow/dataset/filter.cc index 06e7ed2f88a..34cd3a4b7a8 100644 --- a/cpp/src/arrow/dataset/filter.cc +++ b/cpp/src/arrow/dataset/filter.cc @@ -894,7 +894,7 @@ Result TreeEvaluator::Evaluate(const AndExpression& expr, if (lhs.is_array() && rhs.is_array()) { Datum out; compute::FunctionContext ctx{pool_}; - RETURN_NOT_OK(arrow::compute::And(&ctx, lhs, rhs, &out)); + RETURN_NOT_OK(arrow::compute::KleeneAnd(&ctx, lhs, rhs, &out)); return std::move(out); } @@ -925,7 +925,7 @@ Result TreeEvaluator::Evaluate(const OrExpression& expr, if (lhs.is_array() && rhs.is_array()) { Datum out; compute::FunctionContext ctx{pool_}; - RETURN_NOT_OK(arrow::compute::Or(&ctx, lhs, rhs, &out)); + RETURN_NOT_OK(arrow::compute::KleeneOr(&ctx, lhs, rhs, &out)); return std::move(out); } diff --git a/cpp/src/arrow/dataset/filter_test.cc b/cpp/src/arrow/dataset/filter_test.cc index 81677d697f9..085e8402f15 100644 --- a/cpp/src/arrow/dataset/filter_test.cc +++ b/cpp/src/arrow/dataset/filter_test.cc @@ -241,7 +241,7 @@ TEST_F(FilterTest, Basics) { {"a": 1, "b": 0.2, "in": 1}, {"a": 2, "b": -0.1, "in": 0}, {"a": 0, "b": 0.1, "in": 0}, - {"a": 0, "b": null, "in": null}, + {"a": 0, "b": null, "in": 0}, {"a": 0, "b": 1.0, "in": 0} ])"); } @@ -259,8 +259,7 @@ TEST_F(FilterTest, ConditionOnAbsentColumn) { ])"); } -TEST_F(FilterTest, DISABLED_KleeneTruthTables) { - // FIXME(bkietz) enable this test after ARROW-6396 +TEST_F(FilterTest, KleeneTruthTables) { // TODO(bkietz) also test various ranks against each other AssertFilter("a"_ and "b"_, {field("a", boolean()), field("b", boolean())}, R"([ {"a":null, "b":null, "in":null}, @@ -273,7 +272,7 @@ TEST_F(FilterTest, DISABLED_KleeneTruthTables) { {"a":false, "b":false, "in":false} ])"); - AssertFilter("a"_ and "b"_, {field("a", boolean()), field("b", boolean())}, R"([ + AssertFilter("a"_ or "b"_, {field("a", boolean()), field("b", boolean())}, R"([ {"a":null, "b":null, "in":null}, {"a":null, "b":true, "in":true}, {"a":null, "b":false, "in":null}, diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index dab20a69df3..99c988b1355 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -24,12 +24,16 @@ #endif #include +#include #include #include #include #include +#include +#include #include +#include "arrow/array.h" #include "arrow/buffer.h" #include "arrow/status.h" #include "arrow/util/align_util.h" @@ -323,6 +327,37 @@ Status BitmapOp(MemoryPool* pool, const uint8_t* left, int64_t left_offset, } // namespace +std::string Bitmap::ToString() const { + std::string out(length_, '0'); + for (int64_t i = 0; i < length_; ++i) { + out[i] = GetBit(i) ? '1' : '0'; + } + return out; +} + +std::shared_ptr Bitmap::ToArray() const { + return std::make_shared(length_, buffer_, nullptr, 0, offset_); +} + +std::string Bitmap::Diff(const Bitmap& other) const { + return ToArray()->Diff(*other.ToArray()); +} + +bool Bitmap::Equals(const Bitmap& other) const { + if (length_ != other.length_) { + return false; + } + return BitmapEquals(buffer_->data(), offset_, other.buffer_->data(), other.offset(), + length_); +} + +int64_t Bitmap::BitLength(const Bitmap* bitmaps, size_t N) { + for (size_t i = 1; i < N; ++i) { + DCHECK_EQ(bitmaps[i].length(), bitmaps[0].length()); + } + return bitmaps[0].length(); +} + Status BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset, const uint8_t* right, int64_t right_offset, int64_t length, int64_t out_offset, std::shared_ptr* out_buffer) { diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 3718a16fa5b..c3dbf4b1d28 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -14,8 +14,8 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -#ifndef ARROW_UTIL_BIT_UTIL_H -#define ARROW_UTIL_BIT_UTIL_H + +#pragma once #ifdef _WIN32 #define ARROW_LITTLE_ENDIAN 1 @@ -52,23 +52,34 @@ #define ARROW_BYTE_SWAP32 __builtin_bswap32 #endif +#include +#include +#include +#include #include #include #include #include #include +#include #include +#include #include +#include "arrow/buffer.h" +#include "arrow/util/compare.h" +#include "arrow/util/functional.h" #include "arrow/util/macros.h" +#include "arrow/util/string_builder.h" +#include "arrow/util/string_view.h" #include "arrow/util/type_traits.h" #include "arrow/util/visibility.h" namespace arrow { -class Buffer; class MemoryPool; class Status; +class BooleanArray; namespace detail { @@ -827,6 +838,253 @@ Status InvertBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset, ARROW_EXPORT int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length); +class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, + public util::EqualityComparable { + public: + template + using View = util::basic_string_view; + + Bitmap() = default; + + Bitmap(std::shared_ptr buffer, int64_t offset, int64_t length) + : buffer_(std::move(buffer)), offset_(offset), length_(length) {} + + Bitmap(const void* data, int64_t offset, int64_t length) + : buffer_(std::make_shared(static_cast(data), + BitUtil::BytesForBits(offset + length))), + offset_(offset), + length_(length) {} + + Bitmap(void* data, int64_t offset, int64_t length) + : buffer_(std::make_shared(static_cast(data), + BitUtil::BytesForBits(offset + length))), + offset_(offset), + length_(length) {} + + Bitmap Slice(int64_t offset) const { + return Bitmap(buffer_, offset_ + offset, length_ - offset); + } + + Bitmap Slice(int64_t offset, int64_t length) const { + return Bitmap(buffer_, offset_ + offset, length); + } + + std::string ToString() const; + + bool Equals(const Bitmap& other) const; + + std::string Diff(const Bitmap& other) const; + + bool GetBit(int64_t i) const { return BitUtil::GetBit(buffer_->data(), i + offset_); } + + bool operator[](int64_t i) const { return GetBit(i); } + + void SetBitTo(int64_t i, bool v) const { + BitUtil::SetBitTo(buffer_->mutable_data(), i + offset_, v); + } + + /// \brief Visit bits from each bitmap as bitset + /// + /// All bitmaps must have identical length. + template + static void VisitBits(const Bitmap (&bitmaps)[N], Visitor&& visitor) { + int64_t bit_length = BitLength(bitmaps, N); + std::bitset bits; + for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) { + for (size_t i = 0; i < N; ++i) { + bits[i] = bitmaps[i].GetBit(bit_i); + } + visitor(bits); + } + } + + /// \brief Visit words of bits from each bitmap as array + /// + /// All bitmaps must have identical length. The first bit in a visited bitmap + /// may be offset within the first visited word, but words will otherwise contain + /// densely packed bits loaded from the bitmap. That offset within the first word is + /// returned. + /// + /// TODO(bkietz) allow for early termination + template ::value_type> + static int64_t VisitWords(const Bitmap (&bitmaps_arg)[N], Visitor&& visitor) { + constexpr int64_t kBitWidth = sizeof(Word) * 8; + + // local, mutable variables which will be sliced/decremented to represent consumption: + Bitmap bitmaps[N]; + int64_t offsets[N]; + int64_t bit_length = BitLength(bitmaps_arg, N); + View words[N]; + for (size_t i = 0; i < N; ++i) { + bitmaps[i] = bitmaps_arg[i]; + offsets[i] = bitmaps[i].template word_offset(); + assert(offsets[i] >= 0 && offsets[i] < kBitWidth); + words[i] = bitmaps[i].template words(); + } + + auto consume = [&](int64_t consumed_bits) { + for (size_t i = 0; i < N; ++i) { + bitmaps[i] = bitmaps[i].Slice(consumed_bits, bit_length - consumed_bits); + offsets[i] = bitmaps[i].template word_offset(); + assert(offsets[i] >= 0 && offsets[i] < kBitWidth); + words[i] = bitmaps[i].template words(); + } + bit_length -= consumed_bits; + }; + + std::array visited_words; + visited_words.fill(0); + + if (bit_length <= kBitWidth * 2) { + // bitmaps fit into one or two words so don't bother with optimization + while (bit_length > 0) { + auto leading_bits = std::min(bit_length, kBitWidth); + SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words); + visitor(visited_words); + consume(leading_bits); + } + return 0; + } + + int64_t max_offset = *std::max_element(offsets, offsets + N); + int64_t min_offset = *std::min_element(offsets, offsets + N); + if (max_offset > 0) { + // consume leading bits + auto leading_bits = kBitWidth - min_offset; + SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words); + visitor(visited_words); + consume(leading_bits); + } + assert(*std::min_element(offsets, offsets + N) == 0); + + int64_t whole_word_count = bit_length / kBitWidth; + assert(whole_word_count >= 1); + + if (min_offset == max_offset) { + // all offsets were identical, all leading bits have been consumed + assert( + std::all_of(offsets, offsets + N, [](int64_t offset) { return offset == 0; })); + + for (int64_t word_i = 0; word_i < whole_word_count; ++word_i) { + for (size_t i = 0; i < N; ++i) { + visited_words[i] = words[i][word_i]; + } + visitor(visited_words); + } + consume(whole_word_count * kBitWidth); + } else { + // leading bits from potentially incomplete words have been consumed + + // word_i such that words[i][word_i] and words[i][word_i + 1] are lie entirely + // within the bitmap for all i + for (int64_t word_i = 0; word_i < whole_word_count - 1; ++word_i) { + for (size_t i = 0; i < N; ++i) { + if (offsets[i] == 0) { + visited_words[i] = words[i][word_i]; + } else { + visited_words[i] = words[i][word_i] >> offsets[i]; + visited_words[i] |= words[i][word_i + 1] << (kBitWidth - offsets[i]); + } + } + visitor(visited_words); + } + consume((whole_word_count - 1) * kBitWidth); + + SafeLoadWords(bitmaps, 0, kBitWidth, false, &visited_words); + + visitor(visited_words); + consume(kBitWidth); + } + + // load remaining bits + SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words); + visitor(visited_words); + + return min_offset; + } + + const std::shared_ptr& buffer() const { return buffer_; } + + /// offset of first bit relative to buffer().data() + int64_t offset() const { return offset_; } + + /// number of bits in this Bitmap + int64_t length() const { return length_; } + + /// string_view of all bytes which contain any bit in this Bitmap + util::bytes_view bytes() const { + auto byte_offset = offset_ / 8; + auto byte_count = BitUtil::CeilDiv(offset_ + length_, 8) - byte_offset; + return util::bytes_view(buffer_->data() + byte_offset, byte_count); + } + + private: + /// string_view of all Words which contain any bit in this Bitmap + /// + /// For example, given Word=uint16_t and a bitmap spanning bits [20, 36) + /// words() would span bits [16, 48). + /// + /// 0 16 32 48 64 + /// |-------|-------|------|------| (buffer) + /// [ ] (bitmap) + /// |-------|------| (returned words) + /// + /// \warning The words may contain bytes which lie outside the buffer or are + /// uninitialized. + template + View words() const { + auto bytes_addr = reinterpret_cast(bytes().data()); + auto words_addr = bytes_addr - bytes_addr % sizeof(Word); + auto word_byte_count = + BitUtil::RoundUpToPowerOf2(static_cast(bytes_addr + bytes().size()), + static_cast(sizeof(Word))) - + words_addr; + return View(reinterpret_cast(words_addr), + word_byte_count / sizeof(Word)); + } + + /// offset of first bit relative to words().data() + template + int64_t word_offset() const { + return offset_ + 8 * (reinterpret_cast(buffer_->data()) - + reinterpret_cast(words().data())); + } + + /// load words from bitmaps bitwise + template + static void SafeLoadWords(const Bitmap (&bitmaps)[N], int64_t offset, + int64_t out_length, bool set_trailing_bits, + std::array* out) { + out->fill(0); + + int64_t out_offset = set_trailing_bits ? sizeof(Word) * 8 - out_length : 0; + + Bitmap slices[N], out_bitmaps[N]; + for (size_t i = 0; i < N; ++i) { + slices[i] = bitmaps[i].Slice(offset, out_length); + out_bitmaps[i] = Bitmap(&out->at(i), out_offset, out_length); + } + + int64_t bit_i = 0; + Bitmap::VisitBits(slices, [&](std::bitset bits) { + for (size_t i = 0; i < N; ++i) { + out_bitmaps[i].SetBitTo(bit_i, bits[i]); + } + ++bit_i; + }); + } + + std::shared_ptr ToArray() const; + + /// assert bitmaps have identical length and return that length + static int64_t BitLength(const Bitmap* bitmaps, size_t N); + + std::shared_ptr buffer_; + int64_t offset_ = 0, length_ = 0; +}; + ARROW_EXPORT bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right, int64_t right_offset, int64_t bit_length); @@ -933,5 +1191,3 @@ class BitsetStack { } // namespace internal } // namespace arrow - -#endif // ARROW_UTIL_BIT_UTIL_H diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 837499252ab..5483401cdef 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -17,6 +17,9 @@ #include "benchmark/benchmark.h" +#include +#include +#include #include #include "arrow/buffer.h" @@ -94,6 +97,66 @@ static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { return buffer; } +template +static void BenchmarkAndImpl(benchmark::State& state, DoAnd&& do_and) { + int64_t nbytes = state.range(0); + int64_t offset = state.range(1); + + std::shared_ptr buffer_1 = CreateRandomBuffer(nbytes); + std::shared_ptr buffer_2 = CreateRandomBuffer(nbytes); + std::shared_ptr buffer_3 = CreateRandomBuffer(nbytes); + + const int64_t num_bits = nbytes * 8 - offset; + + internal::Bitmap bitmap_1{buffer_1, 0, num_bits}; + internal::Bitmap bitmap_2{buffer_2, offset, num_bits}; + internal::Bitmap bitmap_3{buffer_3, 0, num_bits}; + + for (auto _ : state) { + do_and({bitmap_1, bitmap_2}, &bitmap_3); + auto total = internal::CountSetBits(bitmap_3.buffer()->data(), bitmap_3.offset(), + bitmap_3.length()); + benchmark::DoNotOptimize(total); + } + state.SetBytesProcessed(state.iterations() * nbytes); +} + +static void BenchmarkBitmapAnd(benchmark::State& state) { + BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { + internal::BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(), + bitmaps[1].buffer()->data(), bitmaps[1].offset(), + bitmaps[0].length(), 0, out->buffer()->mutable_data()); + }); +} + +static void BenchmarkBitmapVisitBitsetAnd(benchmark::State& state) { + BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { + int64_t i = 0; + internal::Bitmap::VisitBits( + bitmaps, [&](std::bitset<2> bits) { out->SetBitTo(i++, bits[0] && bits[1]); }); + }); +} + +static void BenchmarkBitmapVisitUInt8And(benchmark::State& state) { + BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { + int64_t i = 0; + internal::Bitmap::VisitWords(bitmaps, [&](std::array uint8s) { + reinterpret_cast(out->buffer()->mutable_data())[i++] = + uint8s[0] & uint8s[1]; + }); + }); +} + +static void BenchmarkBitmapVisitUInt64And(benchmark::State& state) { + BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { + int64_t i = 0; + internal::Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { + reinterpret_cast(out->buffer()->mutable_data())[i++] = + uint64s[0] & uint64s[1]; + }); + }); +} + template static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) { std::shared_ptr buffer = CreateRandomBuffer(nbytes); @@ -321,5 +384,14 @@ BENCHMARK(GenerateBitsUnrolled)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithoutOffset)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithOffset)->Arg(kBufferSize); +#define AND_BENCHMARK_RANGES \ + { \ + {kBufferSize * 4, kBufferSize * 16}, { 0, 2 } \ + } +BENCHMARK(BenchmarkBitmapAnd)->Ranges(AND_BENCHMARK_RANGES); +BENCHMARK(BenchmarkBitmapVisitBitsetAnd)->Ranges(AND_BENCHMARK_RANGES); +BENCHMARK(BenchmarkBitmapVisitUInt8And)->Ranges(AND_BENCHMARK_RANGES); +BENCHMARK(BenchmarkBitmapVisitUInt64And)->Ranges(AND_BENCHMARK_RANGES); + } // namespace BitUtil } // namespace arrow diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 4ff7e4e1a9d..90f0fc06134 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1092,4 +1092,166 @@ TEST(BitUtil, BitsetStack) { ASSERT_EQ(stack.TopSize(), 0); } +// test the basic assumption of word level Bitmap::Visit +TEST(Bitmap, ShiftingWordsOptimization) { + // single word + { + uint64_t word; + auto bytes = reinterpret_cast(&word); + constexpr size_t kBitWidth = sizeof(word) * 8; + + for (int seed = 0; seed < 64; ++seed) { + random_bytes(sizeof(word), seed, bytes); + + // bits are accessible through simple bit shifting of the word + for (size_t i = 0; i < kBitWidth; ++i) { + ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((word >> i) & 1)); + } + + // bit offset can therefore be accomodated by shifting the word + for (size_t offset = 0; offset < (kBitWidth * 3) / 4; ++offset) { + uint64_t shifted_word = word >> offset; + auto shifted_bytes = reinterpret_cast(&shifted_word); + ASSERT_TRUE( + internal::BitmapEquals(bytes, offset, shifted_bytes, 0, kBitWidth - offset)); + } + } + } + + // two words + { + uint64_t words[2]; + auto bytes = reinterpret_cast(words); + constexpr size_t kBitWidth = sizeof(words[0]) * 8; + + for (int seed = 0; seed < 64; ++seed) { + random_bytes(sizeof(words), seed, bytes); + + // bits are accessible through simple bit shifting of a word + for (size_t i = 0; i < kBitWidth; ++i) { + ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((words[0] >> i) & 1)); + } + for (size_t i = 0; i < kBitWidth; ++i) { + ASSERT_EQ(BitUtil::GetBit(bytes, i + kBitWidth), bool((words[1] >> i) & 1)); + } + + // bit offset can therefore be accomodated by shifting the word + for (size_t offset = 1; offset < (kBitWidth * 3) / 4; offset += 3) { + uint64_t shifted_words[2]; + shifted_words[0] = words[0] >> offset | (words[1] << (kBitWidth - offset)); + shifted_words[1] = words[1] >> offset; + auto shifted_bytes = reinterpret_cast(shifted_words); + + // from offset to unshifted word boundary + ASSERT_TRUE( + internal::BitmapEquals(bytes, offset, shifted_bytes, 0, kBitWidth - offset)); + + // from unshifted word boundary to shifted word boundary + ASSERT_TRUE(internal::BitmapEquals(bytes, kBitWidth, shifted_bytes, + kBitWidth - offset, offset)); + + // from shifted word boundary to end + ASSERT_TRUE(internal::BitmapEquals(bytes, kBitWidth + offset, shifted_bytes, + kBitWidth, kBitWidth - offset)); + } + } + } +} + +namespace internal { + +static Bitmap Copy(const Bitmap& bitmap, std::shared_ptr storage) { + int64_t i = 0; + auto min_offset = Bitmap::VisitWords({bitmap}, [&](std::array uint64s) { + reinterpret_cast(storage->mutable_data())[i++] = uint64s[0]; + }); + return Bitmap(std::move(storage), min_offset, bitmap.length()); +} + +// reconstruct a bitmap from a word-wise visit +TEST(Bitmap, VisitWords) { + constexpr int64_t nbytes = 1 << 10; + std::shared_ptr buffer, actual_buffer; + for (std::shared_ptr* b : {&buffer, &actual_buffer}) { + ASSERT_OK(AllocateBuffer(nbytes, b)); + memset((*b)->mutable_data(), 0, nbytes); + } + random_bytes(nbytes, 0, buffer->mutable_data()); + + constexpr int64_t kBitWidth = 8 * sizeof(uint64_t); + + for (int64_t offset : {0, 1, 2, 5, 17}) { + for (int64_t num_bits : + {int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1, + nbytes * 8 - offset, nbytes * 6, nbytes * 4}) { + Bitmap actual = Copy({buffer, offset, num_bits}, actual_buffer); + ASSERT_EQ(actual, Bitmap(buffer->data(), offset, num_bits)) + << "offset:" << offset << " bits:" << num_bits << std::endl + << Bitmap(actual_buffer, 0, num_bits).Diff({buffer, offset, num_bits}); + } + } +} + +TEST(Bitmap, VisitPartialWords) { + uint64_t words[2]; + constexpr auto nbytes = sizeof(words); + constexpr auto nbits = nbytes * 8; + + auto buffer = Buffer::Wrap(words, 2); + Bitmap bitmap(buffer, 0, nbits); + std::shared_ptr storage; + ASSERT_OK(AllocateBuffer(nbytes, &storage)); + + // words partially outside the buffer are not accessible, but they are loaded bitwise + auto first_byte_was_missing = Bitmap(SliceBuffer(buffer, 1), 0, nbits - 8); + ASSERT_EQ(Copy(first_byte_was_missing, storage), bitmap.Slice(8)); + + auto last_byte_was_missing = Bitmap(SliceBuffer(buffer, 0, nbytes - 1), 0, nbits - 8); + ASSERT_EQ(Copy(last_byte_was_missing, storage), bitmap.Slice(0, nbits - 8)); +} + +// compute bitwise AND of bitmaps using word-wise visit +TEST(Bitmap, VisitWordsAnd) { + constexpr int64_t nbytes = 1 << 10; + std::shared_ptr buffer, actual_buffer, expected_buffer; + for (std::shared_ptr* b : {&buffer, &actual_buffer, &expected_buffer}) { + ASSERT_OK(AllocateBuffer(nbytes, b)); + memset((*b)->mutable_data(), 0, nbytes); + } + random_bytes(nbytes, 0, buffer->mutable_data()); + + constexpr int64_t kBitWidth = 8 * sizeof(uint64_t); + + for (int64_t left_offset : + {0, 1, 2, 5, 17, int(kBitWidth - 1), int(kBitWidth + 1), int(kBitWidth + 17)}) { + for (int64_t right_offset = 0; right_offset < left_offset; ++right_offset) { + for (int64_t num_bits : + {int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1, + 2 * kBitWidth - 1, 2 * kBitWidth, 2 * kBitWidth + 1, nbytes * 8 - left_offset, + 3 * kBitWidth - 1, 3 * kBitWidth, 3 * kBitWidth + 1, nbytes * 6, + nbytes * 4}) { + Bitmap bitmaps[] = {{buffer, left_offset, num_bits}, + {buffer, right_offset, num_bits}}; + + int64_t i = 0; + auto min_offset = + Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { + reinterpret_cast(actual_buffer->mutable_data())[i++] = + uint64s[0] & uint64s[1]; + }); + + BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(), + bitmaps[1].buffer()->data(), bitmaps[1].offset(), bitmaps[0].length(), + 0, expected_buffer->mutable_data()); + + ASSERT_TRUE(BitmapEquals(actual_buffer->data(), min_offset, + expected_buffer->data(), 0, num_bits)) + << "left_offset:" << left_offset << " bits:" << num_bits + << " right_offset:" << right_offset << std::endl + << Bitmap(actual_buffer, 0, num_bits).Diff({expected_buffer, 0, num_bits}); + } + } + } +} +} // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/string_view.h b/cpp/src/arrow/util/string_view.h index 88748429b7e..4a51c2ebd9e 100644 --- a/cpp/src/arrow/util/string_view.h +++ b/cpp/src/arrow/util/string_view.h @@ -15,11 +15,13 @@ // specific language governing permissions and limitations // under the License. -#ifndef ARROW_UTIL_STRING_VIEW_H -#define ARROW_UTIL_STRING_VIEW_H +#pragma once #define nssv_CONFIG_SELECT_STRING_VIEW nssv_STRING_VIEW_NONSTD +#include +#include + #include "arrow/vendored/string_view.hpp" // IWYU pragma: export namespace arrow { @@ -27,7 +29,10 @@ namespace util { using nonstd::string_view; +template > +using basic_string_view = nonstd::basic_string_view; + +using bytes_view = basic_string_view; + } // namespace util } // namespace arrow - -#endif // ARROW_UTIL_STRING_VIEW_H