From 30287326a7b533953b7a5a7965e1804d41be1872 Mon Sep 17 00:00:00 2001 From: Masha Basmanova Date: Mon, 1 Jul 2024 08:57:09 -0700 Subject: [PATCH] Optimize cast(uuid as varchar) Summary: boost::lexical_cast used to implement cast(uuid as varchar) is very slow. Replace with custom optimization. Microbenchmark shows 20x improvement. The benchmark compares uuid() with cast(uuid() as varchar). The latter includes the code of the former plus the cost of the cast. Before the change, uuid() + cast was 16s, 10s more than uuid() alone. After the change, uuid() + cast() is just 0.5s more. Before: ``` ============================================================================ [...]hmarks/ExpressionBenchmarkBuilder.cpp relative time/iter iters/s ============================================================================ cast##no_cast 6.33s 157.86m cast##as_varchar 16.54s 60.45m ``` After: ``` ============================================================================ cast##no_cast 6.29s 159.10m cast##as_varchar 6.81s 146.74m ---------------------------------------------------------------------------- ``` Profile before the optimization: {F1735048022} Differential Revision: D59229653 --- .../prestosql/benchmarks/CMakeLists.txt | 5 +++ .../benchmarks/UuidCastBenchmark.cpp | 41 +++++++++++++++++++ .../prestosql/tests/UuidFunctionsTest.cpp | 30 +++++++++++--- velox/functions/prestosql/types/UuidType.cpp | 31 ++++++++++++-- 4 files changed, 98 insertions(+), 9 deletions(-) create mode 100644 velox/functions/prestosql/benchmarks/UuidCastBenchmark.cpp diff --git a/velox/functions/prestosql/benchmarks/CMakeLists.txt b/velox/functions/prestosql/benchmarks/CMakeLists.txt index 41d6f0306c0..87285f445e9 100644 --- a/velox/functions/prestosql/benchmarks/CMakeLists.txt +++ b/velox/functions/prestosql/benchmarks/CMakeLists.txt @@ -191,3 +191,8 @@ add_executable(velox_functions_prestosql_benchmarks_generic GenericBenchmark.cpp) target_link_libraries(velox_functions_prestosql_benchmarks_generic ${BENCHMARK_DEPENDENCIES}) + +add_executable(velox_functions_prestosql_benchmarks_uuid_cast + UuidCastBenchmark.cpp) +target_link_libraries(velox_functions_prestosql_benchmarks_uuid_cast + ${BENCHMARK_DEPENDENCIES}) diff --git a/velox/functions/prestosql/benchmarks/UuidCastBenchmark.cpp b/velox/functions/prestosql/benchmarks/UuidCastBenchmark.cpp new file mode 100644 index 00000000000..e8256b045b4 --- /dev/null +++ b/velox/functions/prestosql/benchmarks/UuidCastBenchmark.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "velox/benchmarks/ExpressionBenchmarkBuilder.h" +#include "velox/functions/prestosql/registration/RegistrationFunctions.h" + +using namespace facebook; +using namespace facebook::velox; + +int main(int argc, char** argv) { + folly::Init init(&argc, &argv); + memory::MemoryManager::initialize({}); + + functions::prestosql::registerAllScalarFunctions(); + + ExpressionBenchmarkBuilder benchmarkBuilder; + + benchmarkBuilder.addBenchmarkSet("cast", ROW({})) + .addExpression("no_cast", "uuid()") + .addExpression("as_varchar", "cast(uuid() as varchar)"); + + benchmarkBuilder.registerBenchmarks(); + folly::runBenchmarks(); + return 0; +} diff --git a/velox/functions/prestosql/tests/UuidFunctionsTest.cpp b/velox/functions/prestosql/tests/UuidFunctionsTest.cpp index 6290443a71e..a66a9ff5adb 100644 --- a/velox/functions/prestosql/tests/UuidFunctionsTest.cpp +++ b/velox/functions/prestosql/tests/UuidFunctionsTest.cpp @@ -14,6 +14,9 @@ * limitations under the License. */ +#include +#include +#include #include "velox/common/base/tests/GTestUtils.h" #include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h" @@ -50,18 +53,35 @@ TEST_F(UuidFunctionsTest, typeof) { } TEST_F(UuidFunctionsTest, castAsVarchar) { + const vector_size_t size = 1'000; + auto uuids = + evaluate>("uuid()", makeRowVector(ROW({}), size)); + auto result = evaluate>( - "cast(uuid() as varchar)", makeRowVector(ROW({}), 10)); + "cast(c0 as varchar)", makeRowVector({uuids})); + + // Verify that CAST results as the same as boost::lexical_cast. We do not use + // boost::lexical_cast to implement CAST because it is too slow. + auto expected = makeFlatVector(size, [&](auto row) { + const auto uuid = uuids->valueAt(row); + + boost::uuids::uuid u; + memcpy(&u, &uuid, 16); + + return boost::lexical_cast(u); + }); + + velox::test::assertEqualVectors(expected, result); // Sanity check results. All strings are unique. Each string is 36 bytes // long. - std::unordered_set uuids; - for (auto i = 0; i < 10; ++i) { + std::unordered_set uniqueUuids; + for (auto i = 0; i < size; ++i) { const auto uuid = result->valueAt(i).str(); ASSERT_EQ(36, uuid.size()); - ASSERT_TRUE(uuids.insert(uuid).second); + ASSERT_TRUE(uniqueUuids.insert(uuid).second); } - ASSERT_EQ(10, uuids.size()); + ASSERT_EQ(size, uniqueUuids.size()); } TEST_F(UuidFunctionsTest, castRoundTrip) { diff --git a/velox/functions/prestosql/types/UuidType.cpp b/velox/functions/prestosql/types/UuidType.cpp index 8d11e941664..8d0b5b5b22f 100644 --- a/velox/functions/prestosql/types/UuidType.cpp +++ b/velox/functions/prestosql/types/UuidType.cpp @@ -77,13 +77,36 @@ class UuidCastOperator : public exec::CastOperator { context.applyToSelectedNoThrow(rows, [&](auto row) { const auto uuid = uuids->valueAt(row); - boost::uuids::uuid u; - memcpy(&u, &uuid, 16); + const uint8_t* uuidBytes = reinterpret_cast(&uuid); + + // Do not use boost::lexical_cast. It is very slow. - std::string s = boost::lexical_cast(u); + // 2 hex digits per each value in [0, 127] range (1 byte). + static const char* const kHexTable = + "000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; exec::StringWriter result(flatResult, row); - result.append(s); + result.resize(36); + + size_t offset = 0; + for (auto i = 0; i < 16; ++i) { + result.data()[offset] = kHexTable[uuidBytes[i] * 2]; + result.data()[offset + 1] = kHexTable[uuidBytes[i] * 2 + 1]; + + offset += 2; + if (i == 3 || i == 5 || i == 7 || i == 9) { + result.data()[offset] = '-'; + offset++; + } + } + result.finalize(); }); }