Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cpp/src/gandiva/function_registry_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
NativeFunction("upper", DataTypeVector{utf8()}, utf8(), kResultNullIfNull,
"upper_utf8", NativeFunction::kNeedsContext),

NativeFunction("castVARCHAR", DataTypeVector{utf8(), int64()}, utf8(),
kResultNullIfNull, "castVARCHAR_utf8_int64",
NativeFunction::kNeedsContext),

NativeFunction("like", DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull,
"gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder)};

Expand Down
12 changes: 12 additions & 0 deletions cpp/src/gandiva/precompiled/string_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,18 @@ char* upper_utf8(int64 context, const char* data, int32 data_len, int32_t* out_l
return ret;
}

// Truncates the string to given length
FORCE_INLINE
char* castVARCHAR_utf8_int64(int64 context, const char* data, int32 data_len,
int64_t out_len, int32_t* out_length) {
// TODO: handle allocation failures
int32_t len = data_len <= static_cast<int32_t>(out_len) ? data_len : static_cast<int32_t>(out_len);
char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, len));
memcpy(ret, data, len);
*out_length = len;
return ret;
}

#define IS_NULL(NAME, TYPE) \
FORCE_INLINE \
bool NAME##_##TYPE(TYPE in, int32 len, boolean is_valid) { return !is_valid; }
Expand Down
1 change: 1 addition & 0 deletions cpp/src/gandiva/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ add_gandiva_test(in_expr_test)
add_gandiva_test(null_validity_test)
add_gandiva_test(decimal_test)
add_gandiva_test(decimal_single_test)
add_gandiva_test(utf8_test)

add_gandiva_test(projector_test_static SOURCES projector_test.cc USE_STATIC_LINKING)

Expand Down
52 changes: 50 additions & 2 deletions cpp/src/gandiva/tests/utf8_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,55 @@ TEST_F(TestUtf8, TestToDateError) {
<< status.message();
}

TEST_F(TestUtf8, TestCastVarChar) {
// schema for input fields
auto field_a = field("a", utf8());
auto field_c = field("c", utf8());
auto schema = arrow::schema({field_a, field_c});

// output fields
auto res = field("res", boolean());

// build expressions.
auto node_a = TreeExprBuilder::MakeField(field_a);
auto node_c = TreeExprBuilder::MakeField(field_c);
// truncates the string to input length
auto node_b = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(10));
auto cast_varchar =
TreeExprBuilder::MakeFunction("castVARCHAR", {node_a, node_b}, utf8());
auto equals = TreeExprBuilder::MakeFunction("equal", {cast_varchar, node_c}, boolean());
auto expr = TreeExprBuilder::MakeExpression(equals, res);

// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();

// Create a row-batch with some sample data
int num_records = 5;
auto array_a = MakeArrowArrayUtf8(
{"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"},
{true, true, false, true, true});

auto array_b =
MakeArrowArrayUtf8({"park", "Sparkle", "bright spar", "fiery SPAR", "मदन"},
{true, true, true, true, true});

// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b});

arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();

auto exp = MakeArrowArrayBool({true, true, false, true, true},
{true, true, false, true, true});


// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
}

TEST_F(TestUtf8, TestIsNull) {
// schema for input fields
auto field_a = field("a", utf8());
Expand All @@ -492,11 +541,10 @@ TEST_F(TestUtf8, TestIsNull) {

// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});

// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);

// validate results
EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({false, false, true, false}),
outputs[0]); // isnull
Expand Down