From 816fe164d4a6b928287150d8b2c8a5e19b772e8e Mon Sep 17 00:00:00 2001 From: Praveen Date: Sun, 23 Jun 2019 23:51:24 +0530 Subject: [PATCH] Implement castVarcharVarchar --- cpp/src/gandiva/function_registry_string.cc | 4 ++ cpp/src/gandiva/precompiled/string_ops.cc | 12 ++++++ cpp/src/gandiva/tests/CMakeLists.txt | 1 + cpp/src/gandiva/tests/utf8_test.cc | 48 +++++++++++++++++++++ 4 files changed, 65 insertions(+) diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index bc78ec66522..a62fde827c3 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -51,6 +51,10 @@ std::vector GetStringFunctionRegistry() { NativeFunction("upper", DataTypeVector{utf8()}, utf8(), kResultNullIfNull, "upper_utf8", NativeFunction::kNeedsContext), + NativeFunction("castVARCHAR", DataTypeVector{utf8(), int64()}, utf8(), + kResultNullIfNull, "castVARCHAR_utf8_int64", + NativeFunction::kNeedsContext), + NativeFunction("like", DataTypeVector{utf8(), utf8()}, boolean(), kResultNullIfNull, "gdv_fn_like_utf8_utf8", NativeFunction::kNeedsFunctionHolder)}; diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc index 2125e18422f..545499a6a51 100644 --- a/cpp/src/gandiva/precompiled/string_ops.cc +++ b/cpp/src/gandiva/precompiled/string_ops.cc @@ -160,4 +160,16 @@ char* upper_utf8(int64 context, const char* data, int32 data_len, int32_t* out_l return ret; } +// Truncates the string to given length +FORCE_INLINE +char* castVARCHAR_utf8_int64(int64 context, const char* data, int32 data_len, + int64_t out_len, int32_t* out_length) { + // TODO: handle allocation failures + int32_t len = data_len <= static_cast(out_len) ? data_len : static_cast(out_len); + char* ret = reinterpret_cast(gdv_fn_context_arena_malloc(context, len)); + memcpy(ret, data, len); + *out_length = len; + return ret; +} + } // extern "C" diff --git a/cpp/src/gandiva/tests/CMakeLists.txt b/cpp/src/gandiva/tests/CMakeLists.txt index 0ec45fd2988..3c40b75d605 100644 --- a/cpp/src/gandiva/tests/CMakeLists.txt +++ b/cpp/src/gandiva/tests/CMakeLists.txt @@ -29,6 +29,7 @@ add_gandiva_test(in_expr_test) add_gandiva_test(null_validity_test) add_gandiva_test(decimal_test) add_gandiva_test(decimal_single_test) +add_gandiva_test(utf8_test) add_gandiva_test(projector_test_static SOURCES projector_test.cc USE_STATIC_LINKING) diff --git a/cpp/src/gandiva/tests/utf8_test.cc b/cpp/src/gandiva/tests/utf8_test.cc index 925ceea8362..6fdc6efdf50 100644 --- a/cpp/src/gandiva/tests/utf8_test.cc +++ b/cpp/src/gandiva/tests/utf8_test.cc @@ -468,4 +468,52 @@ TEST_F(TestUtf8, TestToDateError) { << status.message(); } +TEST_F(TestUtf8, TestCastVarChar) { + // schema for input fields + auto field_a = field("a", utf8()); + auto field_c = field("c", utf8()); + auto schema = arrow::schema({field_a, field_c}); + + // output fields + auto res = field("res", boolean()); + + // build expressions. + auto node_a = TreeExprBuilder::MakeField(field_a); + auto node_c = TreeExprBuilder::MakeField(field_c); + // truncates the string to input length + auto node_b = TreeExprBuilder::MakeLiteral(static_cast(10)); + auto cast_varchar = + TreeExprBuilder::MakeFunction("castVARCHAR", {node_a, node_b}, utf8()); + auto equals = TreeExprBuilder::MakeFunction("equal", {cast_varchar, node_c}, boolean()); + auto expr = TreeExprBuilder::MakeExpression(equals, res); + + // Build a projector for the expressions. + std::shared_ptr projector; + auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector); + EXPECT_TRUE(status.ok()) << status.message(); + + // Create a row-batch with some sample data + int num_records = 5; + auto array_a = MakeArrowArrayUtf8( + {"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"}, + {true, true, false, true, true}); + + auto array_b = + MakeArrowArrayUtf8({"park", "Sparkle", "bright spar", "fiery SPAR", "मदन"}, + {true, true, true, true, true}); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()) << status.message(); + + auto exp = MakeArrowArrayBool({true, true, false, true, true}, + {true, true, false, true, true}); + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]); +} + } // namespace gandiva