diff --git a/velox/functions/prestosql/benchmarks/CMakeLists.txt b/velox/functions/prestosql/benchmarks/CMakeLists.txt index 41d6f0306c0..87285f445e9 100644 --- a/velox/functions/prestosql/benchmarks/CMakeLists.txt +++ b/velox/functions/prestosql/benchmarks/CMakeLists.txt @@ -191,3 +191,8 @@ add_executable(velox_functions_prestosql_benchmarks_generic GenericBenchmark.cpp) target_link_libraries(velox_functions_prestosql_benchmarks_generic ${BENCHMARK_DEPENDENCIES}) + +add_executable(velox_functions_prestosql_benchmarks_uuid_cast + UuidCastBenchmark.cpp) +target_link_libraries(velox_functions_prestosql_benchmarks_uuid_cast + ${BENCHMARK_DEPENDENCIES}) diff --git a/velox/functions/prestosql/benchmarks/UuidCastBenchmark.cpp b/velox/functions/prestosql/benchmarks/UuidCastBenchmark.cpp new file mode 100644 index 00000000000..e8256b045b4 --- /dev/null +++ b/velox/functions/prestosql/benchmarks/UuidCastBenchmark.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "velox/benchmarks/ExpressionBenchmarkBuilder.h" +#include "velox/functions/prestosql/registration/RegistrationFunctions.h" + +using namespace facebook; +using namespace facebook::velox; + +int main(int argc, char** argv) { + folly::Init init(&argc, &argv); + memory::MemoryManager::initialize({}); + + functions::prestosql::registerAllScalarFunctions(); + + ExpressionBenchmarkBuilder benchmarkBuilder; + + benchmarkBuilder.addBenchmarkSet("cast", ROW({})) + .addExpression("no_cast", "uuid()") + .addExpression("as_varchar", "cast(uuid() as varchar)"); + + benchmarkBuilder.registerBenchmarks(); + folly::runBenchmarks(); + return 0; +} diff --git a/velox/functions/prestosql/tests/UuidFunctionsTest.cpp b/velox/functions/prestosql/tests/UuidFunctionsTest.cpp index 6290443a71e..a66a9ff5adb 100644 --- a/velox/functions/prestosql/tests/UuidFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/UuidFunctionsTest.cpp @@ -14,6 +14,9 @@ * limitations under the License. */ +#include +#include +#include #include "velox/common/base/tests/GTestUtils.h" #include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" @@ -50,18 +53,35 @@ TEST_F(UuidFunctionsTest, typeof) { } TEST_F(UuidFunctionsTest, castAsVarchar) { + const vector_size_t size = 1'000; + auto uuids = + evaluate>("uuid()", makeRowVector(ROW({}), size)); + auto result = evaluate>( - "cast(uuid() as varchar)", makeRowVector(ROW({}), 10)); + "cast(c0 as varchar)", makeRowVector({uuids})); + + // Verify that CAST results as the same as boost::lexical_cast. We do not use + // boost::lexical_cast to implement CAST because it is too slow. + auto expected = makeFlatVector(size, [&](auto row) { + const auto uuid = uuids->valueAt(row); + + boost::uuids::uuid u; + memcpy(&u, &uuid, 16); + + return boost::lexical_cast(u); + }); + + velox::test::assertEqualVectors(expected, result); // Sanity check results. All strings are unique. Each string is 36 bytes // long. - std::unordered_set uuids; - for (auto i = 0; i < 10; ++i) { + std::unordered_set uniqueUuids; + for (auto i = 0; i < size; ++i) { const auto uuid = result->valueAt(i).str(); ASSERT_EQ(36, uuid.size()); - ASSERT_TRUE(uuids.insert(uuid).second); + ASSERT_TRUE(uniqueUuids.insert(uuid).second); } - ASSERT_EQ(10, uuids.size()); + ASSERT_EQ(size, uniqueUuids.size()); } TEST_F(UuidFunctionsTest, castRoundTrip) { diff --git a/velox/functions/prestosql/types/UuidType.cpp b/velox/functions/prestosql/types/UuidType.cpp index 8d11e941664..8d0b5b5b22f 100644 --- a/velox/functions/prestosql/types/UuidType.cpp +++ b/velox/functions/prestosql/types/UuidType.cpp @@ -77,13 +77,36 @@ class UuidCastOperator : public exec::CastOperator { context.applyToSelectedNoThrow(rows, [&](auto row) { const auto uuid = uuids->valueAt(row); - boost::uuids::uuid u; - memcpy(&u, &uuid, 16); + const uint8_t* uuidBytes = reinterpret_cast(&uuid); + + // Do not use boost::lexical_cast. It is very slow. - std::string s = boost::lexical_cast(u); + // 2 hex digits per each value in [0, 127] range (1 byte). + static const char* const kHexTable = + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; exec::StringWriter result(flatResult, row); - result.append(s); + result.resize(36); + + size_t offset = 0; + for (auto i = 0; i < 16; ++i) { + result.data()[offset] = kHexTable[uuidBytes[i] * 2]; + result.data()[offset + 1] = kHexTable[uuidBytes[i] * 2 + 1]; + + offset += 2; + if (i == 3 || i == 5 || i == 7 || i == 9) { + result.data()[offset] = '-'; + offset++; + } + } + result.finalize(); }); }