Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,10 @@ class ARROW_EXPORT Buffer {
return util::string_view(reinterpret_cast<const char*>(data_), size_);
}

/// \brief View buffer contents as a util::bytes_view
/// \return util::bytes_view
explicit operator util::bytes_view() const { return util::bytes_view(data_, size_); }

/// \brief Return a pointer to the buffer's data
const uint8_t* data() const { return data_; }
/// \brief Return a writable pointer to the buffer's data
Expand Down
137 changes: 126 additions & 11 deletions cpp/src/arrow/compute/kernels/boolean.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

#include "arrow/compute/kernels/boolean.h"

#include <bitset>
#include <memory>
#include <utility>
#include <vector>

#include "arrow/array.h"
Expand All @@ -31,6 +33,7 @@

namespace arrow {

using internal::Bitmap;
using internal::BitmapAnd;
using internal::BitmapOr;
using internal::BitmapXor;
Expand Down Expand Up @@ -76,7 +79,13 @@ Status Invert(FunctionContext* ctx, const Datum& value, Datum* out) {
return Status::OK();
}

enum class ResolveNull { KLEENE_LOGIC, PROPAGATE };

class BinaryBooleanKernel : public BinaryKernel {
public:
explicit BinaryBooleanKernel(ResolveNull resolve_null) : resolve_null_(resolve_null) {}

protected:
virtual Status Compute(FunctionContext* ctx, const ArrayData& left,
const ArrayData& right, ArrayData* out) = 0;

Expand All @@ -91,50 +100,156 @@ class BinaryBooleanKernel : public BinaryKernel {
ArrayData* result;

result = out->array().get();
RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left_data, right_data, result));
return Compute(ctx, left_data, right_data, result);
}

std::shared_ptr<DataType> out_type() const override { return boolean(); }

enum BitmapIndex { LEFT_VALID, LEFT_DATA, RIGHT_VALID, RIGHT_DATA };

template <typename ComputeWord>
Status ComputeKleene(ComputeWord&& compute_word, FunctionContext* ctx,
const ArrayData& left, const ArrayData& right, ArrayData* out) {
DCHECK(left.null_count != 0 || right.null_count != 0);

Bitmap bitmaps[4];
bitmaps[LEFT_VALID] = {left.buffers[0], left.offset, left.length};
bitmaps[LEFT_DATA] = {left.buffers[1], left.offset, left.length};

bitmaps[RIGHT_VALID] = {right.buffers[0], right.offset, right.length};
bitmaps[RIGHT_DATA] = {right.buffers[1], right.offset, right.length};

RETURN_NOT_OK(AllocateEmptyBitmap(ctx->memory_pool(), out->length, &out->buffers[0]));

auto out_validity = out->GetMutableValues<uint64_t>(0);
auto out_data = out->GetMutableValues<uint64_t>(1);

int64_t i = 0;
auto apply = [&](uint64_t left_valid, uint64_t left_data, uint64_t right_valid,
uint64_t right_data) {
auto left_true = left_valid & left_data;
auto left_false = left_valid & ~left_data;

auto right_true = right_valid & right_data;
auto right_false = right_valid & ~right_data;

compute_word(left_true, left_false, right_true, right_false, &out_validity[i],
&out_data[i]);
++i;
};

if (right.null_count == 0 || left.null_count == 0) {
if (left.null_count == 0) {
// ensure only bitmaps[RIGHT_VALID].buffer might be null
std::swap(bitmaps[LEFT_VALID], bitmaps[RIGHT_VALID]);
std::swap(bitmaps[LEFT_DATA], bitmaps[RIGHT_DATA]);
}
// override bitmaps[RIGHT_VALID] to make it safe for Visit()
bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA];

Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]);
});
} else {
Bitmap::VisitWords(bitmaps, [&](std::array<uint64_t, 4> words) {
apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]);
});
}
return Status::OK();
}

ResolveNull resolve_null_;
};

class AndKernel : public BinaryBooleanKernel {
public:
using BinaryBooleanKernel::BinaryBooleanKernel;

private:
Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right,
ArrayData* out) override {
if (right.length > 0) {
BitmapAnd(left.buffers[1]->data(), left.offset, right.buffers[1]->data(),
right.offset, right.length, 0, out->buffers[1]->mutable_data());
if (resolve_null_ == ResolveNull::PROPAGATE ||
(left.GetNullCount() == 0 && right.GetNullCount() == 0)) {
RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out));
if (right.length > 0) {
BitmapAnd(left.buffers[1]->data(), left.offset, right.buffers[1]->data(),
right.offset, right.length, 0, out->buffers[1]->mutable_data());
}
return Status::OK();
}
return Status::OK();

auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
uint64_t right_false, uint64_t* out_valid,
uint64_t* out_data) {
*out_data = left_true & right_true;
*out_valid = left_false | right_false | (left_true & right_true);
};

return ComputeKleene(compute_word, ctx, left, right, out);
}
};

Status And(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) {
AndKernel and_kernel;
AndKernel and_kernel(ResolveNull::PROPAGATE);
detail::PrimitiveAllocatingBinaryKernel kernel(&and_kernel);
return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out);
}

Status KleeneAnd(FunctionContext* ctx, const Datum& left, const Datum& right,
Datum* out) {
AndKernel and_kernel(ResolveNull::KLEENE_LOGIC);
detail::PrimitiveAllocatingBinaryKernel kernel(&and_kernel);
return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out);
}

class OrKernel : public BinaryBooleanKernel {
public:
using BinaryBooleanKernel::BinaryBooleanKernel;

private:
Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right,
ArrayData* out) override {
if (right.length > 0) {
BitmapOr(left.buffers[1]->data(), left.offset, right.buffers[1]->data(),
right.offset, right.length, 0, out->buffers[1]->mutable_data());
if (resolve_null_ == ResolveNull::PROPAGATE ||
(left.GetNullCount() == 0 && right.GetNullCount() == 0)) {
RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out));
if (right.length > 0) {
BitmapOr(left.buffers[1]->data(), left.offset, right.buffers[1]->data(),
right.offset, right.length, 0, out->buffers[1]->mutable_data());
}
return Status::OK();
}
return Status::OK();

static auto compute_word = [](uint64_t left_true, uint64_t left_false,
uint64_t right_true, uint64_t right_false,
uint64_t* out_valid, uint64_t* out_data) {
*out_data = left_true | right_true;
*out_valid = left_true | right_true | (left_false & right_false);
};

return ComputeKleene(compute_word, ctx, left, right, out);
}
};

Status Or(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) {
OrKernel or_kernel;
OrKernel or_kernel(ResolveNull::PROPAGATE);
detail::PrimitiveAllocatingBinaryKernel kernel(&or_kernel);
return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out);
}

Status KleeneOr(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) {
OrKernel or_kernel(ResolveNull::KLEENE_LOGIC);
detail::PrimitiveAllocatingBinaryKernel kernel(&or_kernel);
return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out);
}

class XorKernel : public BinaryBooleanKernel {
public:
XorKernel() : BinaryBooleanKernel(ResolveNull::PROPAGATE) {}

private:
Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right,
ArrayData* out) override {
RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out));
if (right.length > 0) {
BitmapXor(left.buffers[1]->data(), left.offset, right.buffers[1]->data(),
right.offset, right.length, 0, out->buffers[1]->mutable_data());
Expand Down
36 changes: 34 additions & 2 deletions cpp/src/arrow/compute/kernels/boolean.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ class FunctionContext;
ARROW_EXPORT
Status Invert(FunctionContext* context, const Datum& value, Datum* out);

/// \brief Element-wise AND of two boolean datums
/// \brief Element-wise AND of two boolean datums which always propagates nulls
/// (null and false is null).
///
/// \param[in] context the FunctionContext
/// \param[in] left left operand (array)
/// \param[in] right right operand (array)
Expand All @@ -48,7 +50,23 @@ Status Invert(FunctionContext* context, const Datum& value, Datum* out);
ARROW_EXPORT
Status And(FunctionContext* context, const Datum& left, const Datum& right, Datum* out);

/// \brief Element-wise OR of two boolean datums
/// \brief Element-wise AND of two boolean datums with a Kleene truth table
/// (null and false is false).
///
/// \param[in] context the FunctionContext
/// \param[in] left left operand (array)
/// \param[in] right right operand (array)
/// \param[out] out resulting datum
///
/// \since 1.0.0
/// \note API not yet finalized
ARROW_EXPORT
Status KleeneAnd(FunctionContext* context, const Datum& left, const Datum& right,
Datum* out);

/// \brief Element-wise OR of two boolean datums which always propagates nulls
/// (null and true is null).
///
/// \param[in] context the FunctionContext
/// \param[in] left left operand (array)
/// \param[in] right right operand (array)
Expand All @@ -59,6 +77,20 @@ Status And(FunctionContext* context, const Datum& left, const Datum& right, Datu
ARROW_EXPORT
Status Or(FunctionContext* context, const Datum& left, const Datum& right, Datum* out);

/// \brief Element-wise OR of two boolean datums with a Kleene truth table
/// (null or true is true).
///
/// \param[in] context the FunctionContext
/// \param[in] left left operand (array)
/// \param[in] right right operand (array)
/// \param[out] out resulting datum
///
/// \since 1.0.0
/// \note API not yet finalized
ARROW_EXPORT
Status KleeneOr(FunctionContext* context, const Datum& left, const Datum& right,
Datum* out);

/// \brief Element-wise XOR of two boolean datums
/// \param[in] context the FunctionContext
/// \param[in] left left operand (array)
Expand Down
Loading