From 16c55363bb4eb966b7ad09d19ab2166b24bc9139 Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Mon, 25 Dec 2017 21:31:37 -0800 Subject: [PATCH 1/2] ring hash: optimize hash key build Signed-off-by: Matt Klein --- source/common/common/BUILD | 5 +++- source/common/common/hash.h | 5 ++-- source/common/upstream/BUILD | 3 +++ source/common/upstream/ring_hash_lb.cc | 34 +++++++++++++++++++++++--- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/source/common/common/BUILD b/source/common/common/BUILD index e0b61b3758ed6..9e64dd236a29d 100644 --- a/source/common/common/BUILD +++ b/source/common/common/BUILD @@ -59,7 +59,10 @@ envoy_cc_library( envoy_cc_library( name = "hash_lib", hdrs = ["hash.h"], - external_deps = ["xxhash"], + external_deps = [ + "abseil_strings", + "xxhash", + ], ) envoy_cc_library( diff --git a/source/common/common/hash.h b/source/common/common/hash.h index 5ee6109842f13..a547baa0cb15b 100644 --- a/source/common/common/hash.h +++ b/source/common/common/hash.h @@ -2,6 +2,7 @@ #include +#include "absl/strings/string_view.h" #include "xxhash.h" namespace Envoy { @@ -12,9 +13,7 @@ class HashUtil { * Return 64-bit hash with seed of 0 from the xxHash algorithm. * See https://github.com/Cyan4973/xxHash for details. */ - static uint64_t xxHash64(const std::string& input) { - return XXH64(input.c_str(), input.size(), 0); - } + static uint64_t xxHash64(absl::string_view input) { return XXH64(input.data(), input.size(), 0); } }; } // namespace Envoy diff --git a/source/common/upstream/BUILD b/source/common/upstream/BUILD index bf153e2557648..c22ad38d688d4 100644 --- a/source/common/upstream/BUILD +++ b/source/common/upstream/BUILD @@ -210,6 +210,9 @@ envoy_cc_library( name = "ring_hash_lb_lib", srcs = ["ring_hash_lb.cc"], hdrs = ["ring_hash_lb.h"], + external_deps = [ + "abseil_strings", + ], deps = [ ":load_balancer_lib", "//include/envoy/runtime:runtime_interface", diff --git a/source/common/upstream/ring_hash_lb.cc b/source/common/upstream/ring_hash_lb.cc index fa0b5cedcfbac..07c781cea6d05 100644 --- a/source/common/upstream/ring_hash_lb.cc +++ b/source/common/upstream/ring_hash_lb.cc @@ -7,6 +7,8 @@ #include "common/common/assert.h" #include "common/upstream/load_balancer_impl.h" +#include "absl/strings/string_view.h" + namespace Envoy { namespace Upstream { @@ -136,12 +138,36 @@ RingHashLoadBalancer::Ring::Ring(const Optional hash_key_buffer; + uint64_t last_hash_key_size = 0; for (const auto& host : hosts) { + const std::string& address_string = host->address()->asString(); + uint64_t offset_start = address_string.size(); + + // Although in almost all cases the buffer could be stack allocated to 128 bytes or so, we + // don't explicitly know what is in the address string (e.g., UDS path). We allocate a raw + // buffer on the heap and use it for all calculations. The needed size is the size of the + // address, plus '_', plus 32 bytes for the index. All of this is done to avoid string + // allocations in the fast path. + const uint64_t needed_size = std::min(128UL, address_string.size() + 1 + 32); + if (hash_key_buffer == nullptr || last_hash_key_size < needed_size) { + hash_key_buffer.reset(new char[needed_size]); + last_hash_key_size = needed_size; + } + + memcpy(hash_key_buffer.get(), address_string.c_str(), offset_start); + hash_key_buffer[offset_start++] = '_'; for (uint64_t i = 0; i < hashes_per_host; i++) { - const std::string hash_key(host->address()->asString() + "_" + std::to_string(i)); - const uint64_t hash = - use_std_hash ? std::hash()(hash_key) : HashUtil::xxHash64(hash_key); - ENVOY_LOG(trace, "ring hash: hash_key={} hash={}", hash_key, hash); + const uint64_t total_hash_key_len = + offset_start + StringUtil::itoa(&hash_key_buffer.get()[offset_start], 32, i); + absl::string_view hash_key(hash_key_buffer.get(), total_hash_key_len); + + // Sadly std::hash provides no mechanism for hashing arbitrary bytes so we must copy here. + // xxHash is done wihout copies. + const uint64_t hash = use_std_hash ? std::hash()(std::string(hash_key)) + : HashUtil::xxHash64(hash_key); + ENVOY_LOG(trace, "ring hash: hash_key={} hash={}", hash_key.data(), hash); ring_.push_back({hash, host}); } } From f4dabbbad4e87da3029cde3c27f5a036132091ad Mon Sep 17 00:00:00 2001 From: Matt Klein Date: Sun, 31 Dec 2017 13:07:47 -0800 Subject: [PATCH 2/2] fix Signed-off-by: Matt Klein --- source/common/common/utility.h | 3 ++- source/common/upstream/ring_hash_lb.cc | 30 ++++++++++++-------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/source/common/common/utility.h b/source/common/common/utility.h index 91f46e1ac8594..8c2c159d51ce0 100644 --- a/source/common/common/utility.h +++ b/source/common/common/utility.h @@ -124,10 +124,11 @@ class StringUtil { /** * Convert an unsigned integer to a base 10 string as fast as possible. * @param out supplies the string to fill. - * @param out_len supplies the length of the output buffer. Must be >= 21. + * @param out_len supplies the length of the output buffer. Must be >= MIN_ITOA_OUT_LEN. * @param i supplies the number to convert. * @return the size of the string, not including the null termination. */ + static constexpr size_t MIN_ITOA_OUT_LEN = 21; static uint32_t itoa(char* out, size_t out_len, uint64_t i); /** diff --git a/source/common/upstream/ring_hash_lb.cc b/source/common/upstream/ring_hash_lb.cc index 07c781cea6d05..2fcdd147b4f04 100644 --- a/source/common/upstream/ring_hash_lb.cc +++ b/source/common/upstream/ring_hash_lb.cc @@ -139,29 +139,27 @@ RingHashLoadBalancer::Ring::Ring(const Optional hash_key_buffer; - uint64_t last_hash_key_size = 0; + char hash_key_buffer[196]; for (const auto& host : hosts) { const std::string& address_string = host->address()->asString(); uint64_t offset_start = address_string.size(); - // Although in almost all cases the buffer could be stack allocated to 128 bytes or so, we - // don't explicitly know what is in the address string (e.g., UDS path). We allocate a raw - // buffer on the heap and use it for all calculations. The needed size is the size of the - // address, plus '_', plus 32 bytes for the index. All of this is done to avoid string - // allocations in the fast path. - const uint64_t needed_size = std::min(128UL, address_string.size() + 1 + 32); - if (hash_key_buffer == nullptr || last_hash_key_size < needed_size) { - hash_key_buffer.reset(new char[needed_size]); - last_hash_key_size = needed_size; - } - - memcpy(hash_key_buffer.get(), address_string.c_str(), offset_start); + // Currently, we support both IP and UDS addresses. The UDS max path length is ~108 on all Unix + // platforms that I know of. Given that, we can use a 196 char buffer which is plenty of room + // for UDS, '_', and up to 21 characters for the node ID. To be on the super safe side, there + // is a RELEASE_ASSERT here that checks this, in case someone in the future adds some type of + // new address that is larger, or runs on a platform where UDS is larger. I don't think it's + // worth the defensive coding to deal with the heap allocation case (e.g. via + // absl::InlinedVector) at the current time. + RELEASE_ASSERT(address_string.size() + 1 + StringUtil::MIN_ITOA_OUT_LEN <= + sizeof(hash_key_buffer)); + memcpy(hash_key_buffer, address_string.c_str(), offset_start); hash_key_buffer[offset_start++] = '_'; for (uint64_t i = 0; i < hashes_per_host; i++) { const uint64_t total_hash_key_len = - offset_start + StringUtil::itoa(&hash_key_buffer.get()[offset_start], 32, i); - absl::string_view hash_key(hash_key_buffer.get(), total_hash_key_len); + offset_start + + StringUtil::itoa(hash_key_buffer + offset_start, StringUtil::MIN_ITOA_OUT_LEN, i); + absl::string_view hash_key(hash_key_buffer, total_hash_key_len); // Sadly std::hash provides no mechanism for hashing arbitrary bytes so we must copy here. // xxHash is done wihout copies.