Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions cpp/src/arrow/compute/exec/asof_join_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type_traits.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/future.h"

Expand Down Expand Up @@ -608,6 +609,7 @@ class CompositeReferenceTable {
}

switch (field_type->id()) {
ASOFJOIN_MATERIALIZE_CASE(BOOL)
ASOFJOIN_MATERIALIZE_CASE(INT8)
ASOFJOIN_MATERIALIZE_CASE(INT16)
ASOFJOIN_MATERIALIZE_CASE(INT32)
Expand Down Expand Up @@ -662,16 +664,20 @@ class CompositeReferenceTable {
void AddRecordBatchRef(const std::shared_ptr<RecordBatch>& ref) {
if (!_ptr2ref.count((uintptr_t)ref.get())) _ptr2ref[(uintptr_t)ref.get()] = ref;
}

template <class Type, class Builder = typename TypeTraits<Type>::BuilderType>
enable_if_fixed_width_type<Type, Status> static BuilderAppend(
Builder& builder, const std::shared_ptr<ArrayData>& source, row_index_t row) {
if (source->IsNull(row)) {
builder.UnsafeAppendNull();
return Status::OK();
}
using CType = typename TypeTraits<Type>::CType;
builder.UnsafeAppend(source->template GetValues<CType>(1)[row]);

if constexpr (is_boolean_type<Type>::value) {
builder.UnsafeAppend(bit_util::GetBit(source->template GetValues<uint8_t>(1), row));
} else {
using CType = typename TypeTraits<Type>::CType;
builder.UnsafeAppend(source->template GetValues<CType>(1)[row]);
}
return Status::OK();
}

Expand Down Expand Up @@ -924,6 +930,7 @@ class AsofJoinNode : public ExecNode {

static Status is_valid_data_field(const std::shared_ptr<Field>& field) {
switch (field->type()->id()) {
case Type::BOOL:
case Type::INT8:
case Type::INT16:
case Type::INT32:
Expand Down
22 changes: 19 additions & 3 deletions cpp/src/arrow/compute/exec/asof_join_node_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ Result<BatchesWithSchema> MakeBatchesFromNumString(
const std::vector<std::string_view>& json_strings, int multiplicity = 1) {
FieldVector num_fields;
for (auto field : schema->fields()) {
num_fields.push_back(
is_base_binary_like(field->type()->id()) ? field->WithType(int64()) : field);
auto id = field->type()->id();
bool adjust = id == Type::BOOL || is_base_binary_like(id);
num_fields.push_back(adjust ? field->WithType(int64()) : field);
}
auto num_schema =
std::make_shared<Schema>(num_fields, schema->endianness(), schema->metadata());
Expand All @@ -83,6 +84,7 @@ Result<BatchesWithSchema> MakeBatchesFromNumString(
batches.schema = schema;
int n_fields = schema->num_fields();
for (auto num_batch : num_batches.batches) {
Datum two(Int32Scalar(2));
std::vector<Datum> values;
for (int i = 0; i < n_fields; i++) {
auto type = schema->field(i)->type();
Expand All @@ -91,6 +93,18 @@ Result<BatchesWithSchema> MakeBatchesFromNumString(
ARROW_ASSIGN_OR_RAISE(Datum as_string, Cast(num_batch.values[i], utf8()));
ARROW_ASSIGN_OR_RAISE(Datum as_type, Cast(as_string, type));
values.push_back(as_type);
} else if (Type::BOOL == type->id()) {
// the next 4 lines compute `as_bool` as `(bool)(x - 2*(x/2))`, i.e., the low bit
// of `x`. Here, `x` stands for `num_batch.values[i]`, which is an `int64` value.
// Taking the low bit is a somewhat arbitrary way of obtaining both `true` and
// `false` values from the `int64` values in the test data, in order to get good
// testing coverage. A simple cast to a Boolean value would not get good coverage
// because all positive values would be cast to `true`.
ARROW_ASSIGN_OR_RAISE(Datum div_two, Divide(num_batch.values[i], two));
ARROW_ASSIGN_OR_RAISE(Datum rounded, Multiply(div_two, two));
ARROW_ASSIGN_OR_RAISE(Datum low_bit, Subtract(num_batch.values[i], rounded));
ARROW_ASSIGN_OR_RAISE(Datum as_bool, Cast(low_bit, type));
Comment on lines +103 to +106
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, we have a bit_wise_and kernel but I don't think there is a C++ helper function (e.g. BitWiseAnd) wrapping it. This is probably fine.

values.push_back(as_bool);
} else {
values.push_back(num_batch.values[i]);
}
Expand Down Expand Up @@ -526,6 +540,7 @@ struct BasicTest {
large_utf8(),
binary(),
large_binary(),
boolean(),
int8(),
int16(),
int32(),
Expand All @@ -550,7 +565,8 @@ struct BasicTest {
// byte_width > 1 below allows fitting the tested data
auto time_types = init_types(
all_types, [](T& t) { return t->byte_width() > 1 && !is_floating(t->id()); });
auto key_types = init_types(all_types, [](T& t) { return !is_floating(t->id()); });
auto key_types = init_types(
all_types, [](T& t) { return !is_floating(t->id()) && t->id() != Type::BOOL; });
auto l_types = init_types(all_types, [](T& t) { return true; });
auto r0_types = init_types(all_types, [](T& t) { return t->byte_width() > 1; });
auto r1_types = init_types(all_types, [](T& t) { return t->byte_width() > 1; });
Expand Down