From 1e11c8b11e33de0f198abe3c4032b64f517808e9 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 14 Mar 2019 20:18:03 -0400 Subject: [PATCH 1/9] Speedups to FakeSymbolTableTest based on microbenchmarks from #6161. Signed-off-by: Joshua Marantz --- include/envoy/stats/BUILD | 3 + include/envoy/stats/symbol_table.h | 57 +++---- source/common/stats/BUILD | 1 + source/common/stats/fake_symbol_table_impl.h | 71 +++++++-- source/common/stats/symbol_table_impl.cc | 86 ++++++----- source/common/stats/symbol_table_impl.h | 150 ++++++++++--------- test/common/stats/symbol_table_impl_test.cc | 12 +- 7 files changed, 227 insertions(+), 153 deletions(-) diff --git a/include/envoy/stats/BUILD b/include/envoy/stats/BUILD index 9f1e513135b60..3698bbbabeeda 100644 --- a/include/envoy/stats/BUILD +++ b/include/envoy/stats/BUILD @@ -31,6 +31,9 @@ envoy_cc_library( envoy_cc_library( name = "symbol_table_interface", hdrs = ["symbol_table.h"], + deps = [ + "//source/common/common:hash_lib", + ], ) envoy_cc_library( diff --git a/include/envoy/stats/symbol_table.h b/include/envoy/stats/symbol_table.h index b0efd1cbd75d1..4ac177bfd8953 100644 --- a/include/envoy/stats/symbol_table.h +++ b/include/envoy/stats/symbol_table.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -19,15 +20,7 @@ namespace Stats { */ class StatName; -/** - * Intermediate representation for a stat-name. This helps store multiple names - * in a single packed allocation. First we encode each desired name, then sum - * their sizes for the single packed allocation. This is used to store - * MetricImpl's tags and tagExtractedName. Like StatName, we don't want to pay - * a vptr overhead per object, and the representation is shared between the - * SymbolTable implementations, so this is just a pre-declare. - */ -class SymbolEncoding; +class StatNameList; /** * SymbolTable manages a namespace optimized for stat names, exploiting their @@ -59,22 +52,6 @@ class SymbolTable { virtual ~SymbolTable() = default; - /** - * Encodes a stat name using the symbol table, returning a SymbolEncoding. The - * SymbolEncoding is not intended for long-term storage, but is used to help - * allocate a StatName with the correct amount of storage. - * - * When a name is encoded, it bumps reference counts held in the table for - * each symbol. The caller is responsible for creating a StatName using this - * SymbolEncoding and ultimately disposing of it by calling - * SymbolTable::free(). Users are protected from leaking symbols into the pool - * by ASSERTions in the SymbolTable destructor. - * - * @param name The name to encode. - * @return SymbolEncoding the encoded symbols. - */ - virtual SymbolEncoding encode(absl::string_view name) PURE; - /** * @return uint64_t the number of symbols in the symbol table. */ @@ -130,11 +107,39 @@ class SymbolTable { */ virtual StoragePtr join(const std::vector& stat_names) const PURE; + /** + * Populates a StatNameList from a list of encodings. This is not done at + * construction time to enable StatNameList to be instantiated directly in + * a class that doesn't have a live SymbolTable when it is constructed. + * + * @param names A pointer to the first name in an array. + * @param num_names The number of names. + * @param symbol_table The symbol table in which to encode the names. + */ + virtual void populateList(absl::string_view* names, int32_t num_names, StatNameList& list) PURE; + #ifndef ENVOY_CONFIG_COVERAGE virtual void debugPrint() const PURE; #endif + /** + * Calls the provided function with a string-view representation of the + * elaborated name. This is useful during the interim period when we + * are using FakeSymbolTableImpl, to avoid an extra allocation. Once + * we migrate to using SymbolTableImpl, this interface will no longer + * be helpful and can be removed. The reason it's useful now is that + * it makes up, in part, for some extra runtime overhead that is spent + * on the SymbolTable abstraction and API, without getting any benefit + * from the improved representation. + * + * @param stat_name The stat name. + * @param fn The function to call with the elaborated stat name as a string_view. + */ + virtual void callWithStringView(StatName stat_name, + const std::function& fn) const PURE; + private: + friend struct HeapStatData; friend class StatNameStorage; friend class StatNameList; @@ -158,6 +163,8 @@ class SymbolTable { * @param stat_name the stat name. */ virtual void incRefCount(const StatName& stat_name) PURE; + + virtual StoragePtr copyToBytes(absl::string_view name) PURE; }; using SharedSymbolTable = std::shared_ptr; diff --git a/source/common/stats/BUILD b/source/common/stats/BUILD index 85052c2361946..ca05b803d3aa6 100644 --- a/source/common/stats/BUILD +++ b/source/common/stats/BUILD @@ -130,6 +130,7 @@ envoy_cc_library( "//include/envoy/stats:symbol_table_interface", "//source/common/common:assert_lib", "//source/common/common:logger_lib", + "//source/common/common:stack_array", "//source/common/common:thread_lib", "//source/common/common:utility_lib", ], diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 6c7e2f37bdf86..40a329ddcde6d 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -49,7 +49,42 @@ namespace Stats { */ class FakeSymbolTableImpl : public SymbolTable { public: - SymbolEncoding encode(absl::string_view name) override { return encodeHelper(name); } + // SymbolTable + void populateList(absl::string_view* names, int32_t num_names, StatNameList& list) override { + // This implementation of populateList is similar to + // SymboLableImpl::populateList. This variant is more efficient for + // FakeSymbolTableImpl, because it avoid "encoding" each name in names. The + // strings are laid out abutting each other with 2-byte length prefixes, so + // encoding isn't needed, and doing a dummy encoding step would cost one + // memory allocation per element, adding significant overhead as measured by + // thread_local_store_speed_test. + + RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded"); + + // First encode all the names. + size_t total_size_bytes = 1 + num_names * StatNameSizeEncodingBytes; + + for (int32_t i = 0; i < num_names; ++i) { + total_size_bytes += names[i].size(); + } + + // Now allocate the exact number of bytes required and move the encodings + // into storage. + auto storage = std::make_unique(total_size_bytes); + uint8_t* p = &storage[0]; + *p++ = num_names; + for (int32_t i = 0; i < num_names; ++i) { + auto& name = names[i]; + size_t sz = name.size(); + p = saveLengthToBytesReturningNext(sz, p); + if (!name.empty()) { + memcpy(p, name.data(), sz * sizeof(uint8_t)); + p += sz; + } + } + ASSERT(p == &storage[0] + total_size_bytes); + list.moveStorageIntoList(std::move(storage)); + } std::string toString(const StatName& stat_name) const override { return std::string(toStringView(stat_name)); @@ -75,22 +110,38 @@ class FakeSymbolTableImpl : public SymbolTable { void debugPrint() const override {} #endif + StoragePtr copyToBytes(absl::string_view name) override { + auto bytes = std::make_unique(name.size() + StatNameSizeEncodingBytes); + uint8_t* buffer = saveLengthToBytesReturningNext(name.size(), bytes.get()); + memcpy(buffer, name.data(), name.size()); + return bytes; + } + + void callWithStringView(StatName stat_name, + const std::function& fn) const override { + fn(toStringView(stat_name)); + } + private: - SymbolEncoding encodeHelper(absl::string_view name) const { - SymbolEncoding encoding; - encoding.addStringForFakeSymbolTable(name); - return encoding; + // Saves the specified length into the byte array, returning the next byte. + // There is no guarantee that bytes will be aligned, so we can't cast to a + // uint16_t* and assign, but must individually copy the bytes. + static uint8_t* saveLengthToBytesReturningNext(uint64_t length, uint8_t* bytes) { + ASSERT(length < StatNameMaxSize); + *bytes++ = length & 0xff; + *bytes++ = length >> 8; + return bytes; } absl::string_view toStringView(const StatName& stat_name) const { return {reinterpret_cast(stat_name.data()), stat_name.dataSize()}; } - SymbolTable::StoragePtr stringToStorage(absl::string_view name) const { - SymbolEncoding encoding = encodeHelper(name); - auto bytes = std::make_unique(encoding.bytesRequired()); - encoding.moveToStorage(bytes.get()); - return bytes; + StoragePtr stringToStorage(absl::string_view name) const { + auto storage = std::make_unique(name.size() + 2); + uint8_t* p = saveLengthToBytesReturningNext(name.size(), storage.get()); + memcpy(p, name.data(), name.size()); + return storage; } }; diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 4608886511fbe..b2cfb5b7374a7 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -30,7 +30,7 @@ void StatName::debugPrint() { for (uint64_t i = 0; i < nbytes; ++i) { absl::StrAppend(&msg, " ", static_cast(data()[i])); } - SymbolVec encoding = SymbolEncoding::decodeSymbols(data(), dataSize()); + SymbolVec encoding = SymbolTableImpl::Encoding::decodeSymbols(data(), dataSize()); absl::StrAppend(&msg, ", numSymbols=", encoding.size(), ":"); for (Symbol symbol : encoding) { absl::StrAppend(&msg, " ", symbol); @@ -40,9 +40,9 @@ void StatName::debugPrint() { } #endif -SymbolEncoding::~SymbolEncoding() { ASSERT(vec_.empty()); } +SymbolTableImpl::Encoding::~Encoding() { ASSERT(vec_.empty()); } -void SymbolEncoding::addSymbol(Symbol symbol) { +void SymbolTableImpl::Encoding::addSymbol(Symbol symbol) { // UTF-8-like encoding where a value 127 or less gets written as a single // byte. For higher values we write the low-order 7 bits with a 1 in // the high-order bit. Then we right-shift 7 bits and keep adding more bytes @@ -60,14 +60,8 @@ void SymbolEncoding::addSymbol(Symbol symbol) { } while (symbol != 0); } -void SymbolEncoding::addStringForFakeSymbolTable(absl::string_view str) { - if (!str.empty()) { - vec_.resize(str.size()); - memcpy(&vec_[0], str.data(), str.size()); - } -} - -SymbolVec SymbolEncoding::decodeSymbols(const SymbolTable::Storage array, uint64_t size) { +SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage array, + uint64_t size) { SymbolVec symbol_vec; Symbol symbol = 0; for (uint32_t shift = 0; size > 0; --size, ++array) { @@ -98,7 +92,7 @@ static inline uint8_t* saveLengthToBytesReturningNext(uint64_t length, uint8_t* return bytes; } -uint64_t SymbolEncoding::moveToStorage(SymbolTable::Storage symbol_array) { +uint64_t SymbolTableImpl::Encoding::moveToStorage(SymbolTable::Storage symbol_array) { uint64_t sz = size(); symbol_array = saveLengthToBytesReturningNext(sz, symbol_array); if (sz != 0) { @@ -123,11 +117,9 @@ SymbolTableImpl::~SymbolTableImpl() { // TODO(ambuc): There is a possible performance optimization here for avoiding // the encoding of IPs / numbers if they appear in stat names. We don't want to // waste time symbolizing an integer as an integer, if we can help it. -SymbolEncoding SymbolTableImpl::encode(const absl::string_view name) { - SymbolEncoding encoding; - +void SymbolTableImpl::encode(const absl::string_view name, Encoding& encoding) { if (name.empty()) { - return encoding; + return; } // We want to hold the lock for the minimum amount of time, so we do the @@ -149,7 +141,6 @@ SymbolEncoding SymbolTableImpl::encode(const absl::string_view name) { for (Symbol symbol : symbols) { encoding.addSymbol(symbol); } - return encoding; } uint64_t SymbolTableImpl::numSymbols() const { @@ -159,7 +150,13 @@ uint64_t SymbolTableImpl::numSymbols() const { } std::string SymbolTableImpl::toString(const StatName& stat_name) const { - return decodeSymbolVec(SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize())); + return decodeSymbolVec( + SymbolTableImpl::Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize())); +} + +void SymbolTableImpl::callWithStringView(StatName stat_name, + const std::function& fn) const { + fn(toString(stat_name)); } std::string SymbolTableImpl::decodeSymbolVec(const SymbolVec& symbols) const { @@ -177,7 +174,8 @@ std::string SymbolTableImpl::decodeSymbolVec(const SymbolVec& symbols) const { void SymbolTableImpl::incRefCount(const StatName& stat_name) { // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. - SymbolVec symbols = SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); + SymbolVec symbols = + SymbolTableImpl::Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); Thread::LockGuard lock(lock_); for (Symbol symbol : symbols) { @@ -193,7 +191,8 @@ void SymbolTableImpl::incRefCount(const StatName& stat_name) { void SymbolTableImpl::free(const StatName& stat_name) { // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. - SymbolVec symbols = SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); + SymbolVec symbols = + SymbolTableImpl::Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); Thread::LockGuard lock(lock_); for (Symbol symbol : symbols) { @@ -265,8 +264,8 @@ bool SymbolTableImpl::lessThan(const StatName& a, const StatName& b) const { // If this becomes a performance bottleneck (e.g. during sorting), we could // provide an iterator-like interface for incrementally decoding the symbols // without allocating memory. - SymbolVec av = SymbolEncoding::decodeSymbols(a.data(), a.dataSize()); - SymbolVec bv = SymbolEncoding::decodeSymbols(b.data(), b.dataSize()); + SymbolVec av = SymbolTableImpl::Encoding::decodeSymbols(a.data(), a.dataSize()); + SymbolVec bv = SymbolTableImpl::Encoding::decodeSymbols(b.data(), b.dataSize()); // Calling fromSymbol requires holding the lock, as it needs read-access to // the maps that are written when adding new symbols. @@ -296,12 +295,17 @@ void SymbolTableImpl::debugPrint() const { } #endif -StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) { - SymbolEncoding encoding = table.encode(name); - bytes_ = std::make_unique(encoding.bytesRequired()); - encoding.moveToStorage(bytes_.get()); +SymbolTable::StoragePtr SymbolTableImpl::copyToBytes(absl::string_view name) { + Encoding encoding; + encode(name, encoding); + auto bytes = std::make_unique(encoding.bytesRequired()); + encoding.moveToStorage(bytes.get()); + return bytes; } +StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) + : bytes_(table.copyToBytes(name)) {} + StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { uint64_t size = src.size(); bytes_ = std::make_unique(size); @@ -337,34 +341,34 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const std::vector& stat_ return bytes; } -StatNameList::~StatNameList() { ASSERT(!populated()); } - -void StatNameList::populate(const std::vector& names, - SymbolTable& symbol_table) { - RELEASE_ASSERT(names.size() < 256, "Maximum number elements in a StatNameList exceeded"); +void SymbolTableImpl::populateList(absl::string_view* names, int32_t num_names, + StatNameList& list) { + RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded"); // First encode all the names. size_t total_size_bytes = 1; /* one byte for holding the number of names */ - std::vector encodings; - encodings.resize(names.size()); - int index = 0; - for (auto& name : names) { - SymbolEncoding encoding = symbol_table.encode(name); + + STACK_ARRAY(encodings, Encoding, num_names); + for (int32_t i = 0; i < num_names; ++i) { + Encoding& encoding = encodings[i]; + encode(names[i], encoding); total_size_bytes += encoding.bytesRequired(); - encodings[index++].swap(encoding); } // Now allocate the exact number of bytes required and move the encodings // into storage. - storage_ = std::make_unique(total_size_bytes); - uint8_t* p = &storage_[0]; - *p++ = encodings.size(); + auto storage = std::make_unique(total_size_bytes); + uint8_t* p = &storage[0]; + *p++ = num_names; for (auto& encoding : encodings) { p += encoding.moveToStorage(p); } - ASSERT(p == &storage_[0] + total_size_bytes); + ASSERT(p == &storage[0] + total_size_bytes); + list.moveStorageIntoList(std::move(storage)); } +StatNameList::~StatNameList() { ASSERT(!populated()); } + void StatNameList::iterate(const std::function& f) const { uint8_t* p = &storage_[0]; uint32_t num_elements = *p++; diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index f7c33ebd1d93f..d30044845bb49 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -15,6 +15,7 @@ #include "common/common/hash.h" #include "common/common/lock_guard.h" #include "common/common/non_copyable.h" +#include "common/common/stack_array.h" #include "common/common/thread.h" #include "common/common/utility.h" @@ -37,67 +38,6 @@ constexpr uint64_t StatNameMaxSize = 1 << (8 * StatNameSizeEncodingBytes); // 65 /** Transient representations of a vector of 32-bit symbols */ using SymbolVec = std::vector; -/** - * Represents an 8-bit encoding of a vector of symbols, used as a transient - * representation during encoding and prior to retained allocation. - */ -class SymbolEncoding { -public: - /** - * Before destructing SymbolEncoding, you must call moveToStorage. This - * transfers ownership, and in particular, the responsibility to call - * SymbolTable::clear() on all referenced symbols. If we ever wanted - * to be able to destruct a SymbolEncoding without transferring it - * we could add a clear(SymbolTable&) method. - */ - ~SymbolEncoding(); - - /** - * Encodes a token into the vec. - * - * @param symbol the symbol to encode. - */ - void addSymbol(Symbol symbol); - - /** - * Encodes an entire string into the vec, on behalf of FakeSymbolTableImpl. - * TODO(jmarantz): delete this method when FakeSymbolTableImpl is deleted. - * - * @param str The string to encode. - */ - void addStringForFakeSymbolTable(absl::string_view str); - - /** - * Decodes a uint8_t array into a SymbolVec. - */ - static SymbolVec decodeSymbols(const SymbolTable::Storage array, uint64_t size); - - /** - * Returns the number of bytes required to represent StatName as a uint8_t - * array, including the encoded size. - */ - uint64_t bytesRequired() const { return size() + StatNameSizeEncodingBytes; } - - /** - * Returns the number of uint8_t entries we collected while adding symbols. - */ - uint64_t size() const { return vec_.size(); } - - /** - * Moves the contents of the vector into an allocated array. The array - * must have been allocated with bytesRequired() bytes. - * - * @param array destination memory to receive the encoded bytes. - * @return uint64_t the number of bytes transferred. - */ - uint64_t moveToStorage(SymbolTable::Storage array); - - void swap(SymbolEncoding& src) { vec_.swap(src.vec_); } - -private: - std::vector vec_; -}; - /** * SymbolTableImpl manages a namespace optimized for stats, which are typically * composed of arrays of "."-separated tokens, with a significant overlap @@ -130,22 +70,94 @@ class SymbolEncoding { */ class SymbolTableImpl : public SymbolTable { public: + /** + * Intermediate representation for a stat-name. This helps store multiple names + * in a single packed allocation. First we encode each desired name, then sum + * their sizes for the single packed allocation. This is used to store + * MetricImpl's tags and tagExtractedName. Like StatName, we don't want to pay + * a vptr overhead per object, and the representation is shared between the + * SymbolTable implementations, so this is just a pre-declare. + */ + class Encoding { + public: + /** + * Before destructing SymbolEncoding, you must call moveToStorage. This + * transfers ownership, and in particular, the responsibility to call + * SymbolTable::clear() on all referenced symbols. If we ever wanted + * to be able to destruct a SymbolEncoding without transferring it + * we could add a clear(SymbolTable&) method. + */ + ~Encoding(); + + /** + * Encodes a token into the vec. + * + * @param symbol the symbol to encode. + */ + void addSymbol(Symbol symbol); + + /** + * Encodes an entire string into the vec, on behalf of FakeSymbolTableImpl. + * TODO(jmarantz): delete this method when FakeSymbolTableImpl is deleted. + * + * @param str The string to encode. + */ + void addStringForFakeSymbolTable(absl::string_view str); + + /** + * Decodes a uint8_t array into a SymbolVec. + */ + static SymbolVec decodeSymbols(const SymbolTable::Storage array, uint64_t size); + + /** + * Returns the number of bytes required to represent StatName as a uint8_t + * array, including the encoded size. + */ + uint64_t bytesRequired() const { return size() + StatNameSizeEncodingBytes; } + + /** + * Returns the number of uint8_t entries we collected while adding symbols. + */ + uint64_t size() const { return vec_.size(); } + + /** + * Moves the contents of the vector into an allocated array. The array + * must have been allocated with bytesRequired() bytes. + * + * @param array destination memory to receive the encoded bytes. + * @return uint64_t the number of bytes transferred. + */ + uint64_t moveToStorage(SymbolTable::Storage array); + + void swap(Encoding& src) { vec_.swap(src.vec_); } + + private: + std::vector vec_; + }; + SymbolTableImpl(); ~SymbolTableImpl() override; // SymbolTable std::string toString(const StatName& stat_name) const override; - SymbolEncoding encode(absl::string_view name) override; uint64_t numSymbols() const override; bool lessThan(const StatName& a, const StatName& b) const override; void free(const StatName& stat_name) override; void incRefCount(const StatName& stat_name) override; SymbolTable::StoragePtr join(const std::vector& stat_names) const override; + void encode(absl::string_view name, Encoding& encoding); + void populateList(absl::string_view* names, int32_t num_names, StatNameList& list) override; + #ifndef ENVOY_CONFIG_COVERAGE void debugPrint() const override; #endif + StoragePtr copyToBytes(absl::string_view name) override; + + void callWithStringView(StatName stat_name, + const std::function& fn) const override; + private: friend class StatName; friend class StatNameTest; @@ -157,7 +169,7 @@ class SymbolTableImpl : public SymbolTable { uint32_t ref_count_; }; - // This must be called during both encode() and free(). + // This must be held during both encode() and free(). mutable Thread::MutexBasicLockable lock_; /** @@ -376,15 +388,7 @@ class StatNameList { public: ~StatNameList(); - /** - * Populates the StatNameList from a list of encodings. This is not done at - * construction time to enable StatNameList to be instantiated directly in - * a class that doesn't have a live SymbolTable when it is constructed. - * - * @param encodings The list names to encode. - * @param symbol_table The symbol table in which to encode the names. - */ - void populate(const std::vector& encodings, SymbolTable& symbol_table); + void moveStorageIntoList(SymbolTable::StoragePtr&& storage) { storage_ = std::move(storage); } /** * @return true if populate() has been called on this list. @@ -411,7 +415,7 @@ class StatNameList { void clear(SymbolTable& symbol_table); private: - std::unique_ptr storage_; + SymbolTable::StoragePtr storage_; }; // Helper class for constructing hash-tables with StatName keys. diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index 59680fd38ea09..36c86aaf43654 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -1,5 +1,6 @@ #include +#include "common/common/macros.h" #include "common/common/mutex_tracer_impl.h" #include "common/memory/stats.h" #include "common/stats/fake_symbol_table_impl.h" @@ -39,7 +40,9 @@ class StatNameTest : public testing::TestWithParam { break; } case SymbolTableType::Fake: - table_ = std::make_unique(); + auto table = std::make_unique(); + fake_symbol_table_ = table.get(); + table_ = std::move(table); break; } } @@ -54,7 +57,7 @@ class StatNameTest : public testing::TestWithParam { } SymbolVec getSymbols(StatName stat_name) { - return SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); + return SymbolTableImpl::Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); } std::string decodeSymbolVec(const SymbolVec& symbol_vec) { return real_symbol_table_->decodeSymbolVec(symbol_vec); @@ -71,6 +74,7 @@ class StatNameTest : public testing::TestWithParam { return stat_name_storage_.back().statName(); } + FakeSymbolTableImpl* fake_symbol_table_{nullptr}; SymbolTableImpl* real_symbol_table_{nullptr}; std::unique_ptr table_; @@ -268,10 +272,10 @@ TEST_P(StatNameTest, TestShrinkingExpectation) { // you don't free all the StatNames you've allocated bytes for. StatNameList // provides this capability. TEST_P(StatNameTest, List) { - std::vector names{"hello.world", "goodbye.world"}; + absl::string_view names[] = {"hello.world", "goodbye.world"}; StatNameList name_list; EXPECT_FALSE(name_list.populated()); - name_list.populate(names, *table_); + table_->populateList(names, ARRAY_SIZE(names), name_list); EXPECT_TRUE(name_list.populated()); // First, decode only the first name. From 91e6d7aea74de6aa9ebf176a484973c05d53facb Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 14 Mar 2019 20:32:52 -0400 Subject: [PATCH 2/9] Use SymbolTable::Storage more, and privatize a method that exposes internals to friendds. Signed-off-by: Joshua Marantz --- source/common/stats/fake_symbol_table_impl.h | 4 ++-- source/common/stats/symbol_table_impl.cc | 8 +++---- source/common/stats/symbol_table_impl.h | 23 ++++++++++++++++++-- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 40a329ddcde6d..597d93ea42c17 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -70,7 +70,7 @@ class FakeSymbolTableImpl : public SymbolTable { // Now allocate the exact number of bytes required and move the encodings // into storage. - auto storage = std::make_unique(total_size_bytes); + auto storage = std::make_unique(total_size_bytes); uint8_t* p = &storage[0]; *p++ = num_names; for (int32_t i = 0; i < num_names; ++i) { @@ -111,7 +111,7 @@ class FakeSymbolTableImpl : public SymbolTable { #endif StoragePtr copyToBytes(absl::string_view name) override { - auto bytes = std::make_unique(name.size() + StatNameSizeEncodingBytes); + auto bytes = std::make_unique(name.size() + StatNameSizeEncodingBytes); uint8_t* buffer = saveLengthToBytesReturningNext(name.size(), bytes.get()); memcpy(buffer, name.data(), name.size()); return bytes; diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index b2cfb5b7374a7..d87a3691263ac 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -298,7 +298,7 @@ void SymbolTableImpl::debugPrint() const { SymbolTable::StoragePtr SymbolTableImpl::copyToBytes(absl::string_view name) { Encoding encoding; encode(name, encoding); - auto bytes = std::make_unique(encoding.bytesRequired()); + auto bytes = std::make_unique(encoding.bytesRequired()); encoding.moveToStorage(bytes.get()); return bytes; } @@ -308,7 +308,7 @@ StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { uint64_t size = src.size(); - bytes_ = std::make_unique(size); + bytes_ = std::make_unique(size); src.copyToStorage(bytes_.get()); table.incRefCount(statName()); } @@ -331,7 +331,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const std::vector& stat_ for (StatName stat_name : stat_names) { num_bytes += stat_name.dataSize(); } - auto bytes = std::make_unique(num_bytes + StatNameSizeEncodingBytes); + auto bytes = std::make_unique(num_bytes + StatNameSizeEncodingBytes); uint8_t* p = saveLengthToBytesReturningNext(num_bytes, bytes.get()); for (StatName stat_name : stat_names) { num_bytes = stat_name.dataSize(); @@ -357,7 +357,7 @@ void SymbolTableImpl::populateList(absl::string_view* names, int32_t num_names, // Now allocate the exact number of bytes required and move the encodings // into storage. - auto storage = std::make_unique(total_size_bytes); + auto storage = std::make_unique(total_size_bytes); uint8_t* p = &storage[0]; *p++ = num_names; for (auto& encoding : encodings) { diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index d30044845bb49..10704ff87e6dc 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -388,8 +388,6 @@ class StatNameList { public: ~StatNameList(); - void moveStorageIntoList(SymbolTable::StoragePtr&& storage) { storage_ = std::move(storage); } - /** * @return true if populate() has been called on this list. */ @@ -415,6 +413,27 @@ class StatNameList { void clear(SymbolTable& symbol_table); private: + friend class FakeSymbolTableImpl; + friend class SymbolTableImpl; + + /** + * Moves the specified storage into the list. The storage format is an + * array of bytes, organized like this: + * + * [0] The number of elements in the list (must be < 256). + * [1] low order 8 bits of the number of symbols in the first element. + * [2] high order 8 bits ofthe number of symbols in the first element. + * [3...] the symbols in the first element. + * ... + * + * + * For FakeSymbolTableImpl, each symbol is a single char, casted into a + * uint8_t. For SymbolTableImpl, each symbol is 1 or more bytes, in a + * variable-length encoding. See SymbolTableImpl::Encoding::addSymbol for + * details. + */ + void moveStorageIntoList(SymbolTable::StoragePtr&& storage) { storage_ = std::move(storage); } + SymbolTable::StoragePtr storage_; }; From d5dfb29567ddef3f4769a23a7fdda797d619cbda Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 14 Mar 2019 21:51:52 -0400 Subject: [PATCH 3/9] spelling Signed-off-by: Joshua Marantz --- source/common/stats/fake_symbol_table_impl.h | 2 +- source/common/stats/symbol_table_impl.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 597d93ea42c17..28296a8534218 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -52,7 +52,7 @@ class FakeSymbolTableImpl : public SymbolTable { // SymbolTable void populateList(absl::string_view* names, int32_t num_names, StatNameList& list) override { // This implementation of populateList is similar to - // SymboLableImpl::populateList. This variant is more efficient for + // SymbolTableImpl::populateList. This variant is more efficient for // FakeSymbolTableImpl, because it avoid "encoding" each name in names. The // strings are laid out abutting each other with 2-byte length prefixes, so // encoding isn't needed, and doing a dummy encoding step would cost one diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 10704ff87e6dc..66d80dc9ed59f 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -422,7 +422,7 @@ class StatNameList { * * [0] The number of elements in the list (must be < 256). * [1] low order 8 bits of the number of symbols in the first element. - * [2] high order 8 bits ofthe number of symbols in the first element. + * [2] high order 8 bits of the number of symbols in the first element. * [3...] the symbols in the first element. * ... * From d6f0cce3ac50d7020cf22696dc56ce92fe2c3ffd Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Thu, 14 Mar 2019 23:13:10 -0400 Subject: [PATCH 4/9] any -> full Signed-off-by: Joshua Marantz --- include/envoy/stats/symbol_table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/envoy/stats/symbol_table.h b/include/envoy/stats/symbol_table.h index 4ac177bfd8953..76fcc388361ac 100644 --- a/include/envoy/stats/symbol_table.h +++ b/include/envoy/stats/symbol_table.h @@ -129,7 +129,7 @@ class SymbolTable { * we migrate to using SymbolTableImpl, this interface will no longer * be helpful and can be removed. The reason it's useful now is that * it makes up, in part, for some extra runtime overhead that is spent - * on the SymbolTable abstraction and API, without getting any benefit + * on the SymbolTable abstraction and API, without getting full benefit * from the improved representation. * * @param stat_name The stat name. From a60cd20783d0ac90eba7baedacedac52551f4f68 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 18 Mar 2019 09:25:29 -0400 Subject: [PATCH 5/9] Address review comments. Signed-off-by: Joshua Marantz --- include/envoy/stats/symbol_table.h | 14 ++++++ source/common/stats/fake_symbol_table_impl.h | 29 ++++++------ source/common/stats/symbol_table_impl.cc | 40 +++++++--------- source/common/stats/symbol_table_impl.h | 50 ++++++++++---------- 4 files changed, 72 insertions(+), 61 deletions(-) diff --git a/include/envoy/stats/symbol_table.h b/include/envoy/stats/symbol_table.h index 76fcc388361ac..7a89755d50f0b 100644 --- a/include/envoy/stats/symbol_table.h +++ b/include/envoy/stats/symbol_table.h @@ -132,6 +132,9 @@ class SymbolTable { * on the SymbolTable abstraction and API, without getting full benefit * from the improved representation. * + * TODO(#6307): Remove this when the transition from FakeSymbolTableImpl to + * SymbolTableImpl is complete. + * * @param stat_name The stat name. * @param fn The function to call with the elaborated stat name as a string_view. */ @@ -164,6 +167,17 @@ class SymbolTable { */ virtual void incRefCount(const StatName& stat_name) PURE; + /** + * Encodes 'name' into the symbol table, tokenizing. Bumps reference counts + * for referenced symbols. The caller must manage the storage, and is + * responsible for calling SymbolTable::free() to release the reference + * counts. Note that this method is private, but is called by friend classes + * StatNameStorage and StatNameList. + * + * @param name The name to encode. + * @return The encoded name, transferring ownership to the caller. + * + */ virtual StoragePtr copyToBytes(absl::string_view name) PURE; }; diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 28296a8534218..53e0b3660132f 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -44,7 +44,7 @@ namespace Stats { * that backs each StatName, so there is no sharing or memory savings, but also * no state associated with the SymbolTable, and thus no locks needed. * - * TODO(jmarantz): delete this class once SymbolTable is fully deployed in the + * TODO(#6307): delete this class once SymbolTable is fully deployed in the * Envoy codebase. */ class FakeSymbolTableImpl : public SymbolTable { @@ -59,9 +59,13 @@ class FakeSymbolTableImpl : public SymbolTable { // memory allocation per element, adding significant overhead as measured by // thread_local_store_speed_test. + // We encode the number of names in a single byte, thus there must be less + // than 256 of them. RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded"); - // First encode all the names. + // First encode all the names. The '1' here represents the number of + // names. The num_names * StatNameSizeEncodingBytes reserves space for the + // lengths of each name. size_t total_size_bytes = 1 + num_names * StatNameSizeEncodingBytes; for (int32_t i = 0; i < num_names; ++i) { @@ -76,12 +80,17 @@ class FakeSymbolTableImpl : public SymbolTable { for (int32_t i = 0; i < num_names; ++i) { auto& name = names[i]; size_t sz = name.size(); - p = saveLengthToBytesReturningNext(sz, p); + p = SymbolTableImpl::writeLengthReturningNext(sz, p); if (!name.empty()) { memcpy(p, name.data(), sz * sizeof(uint8_t)); p += sz; } } + + // This assertion double-checks the arithmetic where we computed + // total_size_bytes. After appending all the encoded data into the + // allocated byte array, we should wind up with a pointer difference of + // total_size_bytes from the beginning of the allocation. ASSERT(p == &storage[0] + total_size_bytes); list.moveStorageIntoList(std::move(storage)); } @@ -112,7 +121,7 @@ class FakeSymbolTableImpl : public SymbolTable { StoragePtr copyToBytes(absl::string_view name) override { auto bytes = std::make_unique(name.size() + StatNameSizeEncodingBytes); - uint8_t* buffer = saveLengthToBytesReturningNext(name.size(), bytes.get()); + uint8_t* buffer = SymbolTableImpl::writeLengthReturningNext(name.size(), bytes.get()); memcpy(buffer, name.data(), name.size()); return bytes; } @@ -123,23 +132,13 @@ class FakeSymbolTableImpl : public SymbolTable { } private: - // Saves the specified length into the byte array, returning the next byte. - // There is no guarantee that bytes will be aligned, so we can't cast to a - // uint16_t* and assign, but must individually copy the bytes. - static uint8_t* saveLengthToBytesReturningNext(uint64_t length, uint8_t* bytes) { - ASSERT(length < StatNameMaxSize); - *bytes++ = length & 0xff; - *bytes++ = length >> 8; - return bytes; - } - absl::string_view toStringView(const StatName& stat_name) const { return {reinterpret_cast(stat_name.data()), stat_name.dataSize()}; } StoragePtr stringToStorage(absl::string_view name) const { auto storage = std::make_unique(name.size() + 2); - uint8_t* p = saveLengthToBytesReturningNext(name.size(), storage.get()); + uint8_t* p = SymbolTableImpl::writeLengthReturningNext(name.size(), storage.get()); memcpy(p, name.data(), name.size()); return storage; } diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index d87a3691263ac..ce31206f51c82 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -40,7 +40,11 @@ void StatName::debugPrint() { } #endif -SymbolTableImpl::Encoding::~Encoding() { ASSERT(vec_.empty()); } +SymbolTableImpl::Encoding::~Encoding() { + // Verifies that moveToStorage() was called on this encoding. Failure + // to call moveToStorage() will result in leaks symbols. + ASSERT(vec_.empty()); +} void SymbolTableImpl::Encoding::addSymbol(Symbol symbol) { // UTF-8-like encoding where a value 127 or less gets written as a single @@ -82,19 +86,9 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar return symbol_vec; } -// Saves the specified length into the byte array, returning the next byte. -// There is no guarantee that bytes will be aligned, so we can't cast to a -// uint16_t* and assign, but must individually copy the bytes. -static inline uint8_t* saveLengthToBytesReturningNext(uint64_t length, uint8_t* bytes) { - ASSERT(length < StatNameMaxSize); - *bytes++ = length & 0xff; - *bytes++ = length >> 8; - return bytes; -} - uint64_t SymbolTableImpl::Encoding::moveToStorage(SymbolTable::Storage symbol_array) { - uint64_t sz = size(); - symbol_array = saveLengthToBytesReturningNext(sz, symbol_array); + uint64_t sz = dataBytesRequired(); + symbol_array = writeLengthReturningNext(sz, symbol_array); if (sz != 0) { memcpy(symbol_array, vec_.data(), sz * sizeof(uint8_t)); } @@ -150,8 +144,7 @@ uint64_t SymbolTableImpl::numSymbols() const { } std::string SymbolTableImpl::toString(const StatName& stat_name) const { - return decodeSymbolVec( - SymbolTableImpl::Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize())); + return decodeSymbolVec(Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize())); } void SymbolTableImpl::callWithStringView(StatName stat_name, @@ -174,8 +167,7 @@ std::string SymbolTableImpl::decodeSymbolVec(const SymbolVec& symbols) const { void SymbolTableImpl::incRefCount(const StatName& stat_name) { // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. - SymbolVec symbols = - SymbolTableImpl::Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); + SymbolVec symbols = Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); Thread::LockGuard lock(lock_); for (Symbol symbol : symbols) { @@ -191,8 +183,7 @@ void SymbolTableImpl::incRefCount(const StatName& stat_name) { void SymbolTableImpl::free(const StatName& stat_name) { // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. - SymbolVec symbols = - SymbolTableImpl::Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); + SymbolVec symbols = Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); Thread::LockGuard lock(lock_); for (Symbol symbol : symbols) { @@ -264,8 +255,8 @@ bool SymbolTableImpl::lessThan(const StatName& a, const StatName& b) const { // If this becomes a performance bottleneck (e.g. during sorting), we could // provide an iterator-like interface for incrementally decoding the symbols // without allocating memory. - SymbolVec av = SymbolTableImpl::Encoding::decodeSymbols(a.data(), a.dataSize()); - SymbolVec bv = SymbolTableImpl::Encoding::decodeSymbols(b.data(), b.dataSize()); + SymbolVec av = Encoding::decodeSymbols(a.data(), a.dataSize()); + SymbolVec bv = Encoding::decodeSymbols(b.data(), b.dataSize()); // Calling fromSymbol requires holding the lock, as it needs read-access to // the maps that are written when adding new symbols. @@ -332,7 +323,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const std::vector& stat_ num_bytes += stat_name.dataSize(); } auto bytes = std::make_unique(num_bytes + StatNameSizeEncodingBytes); - uint8_t* p = saveLengthToBytesReturningNext(num_bytes, bytes.get()); + uint8_t* p = writeLengthReturningNext(num_bytes, bytes.get()); for (StatName stat_name : stat_names) { num_bytes = stat_name.dataSize(); memcpy(p, stat_name.data(), num_bytes); @@ -363,6 +354,11 @@ void SymbolTableImpl::populateList(absl::string_view* names, int32_t num_names, for (auto& encoding : encodings) { p += encoding.moveToStorage(p); } + + // This assertion double-checks the arithmetic where we computed + // total_size_bytes. After appending all the encoded data into the + // allocated byte array, we should wind up with a pointer difference of + // total_size_bytes from the beginning of the allocation. ASSERT(p == &storage[0] + total_size_bytes); list.moveStorageIntoList(std::move(storage)); } diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 66d80dc9ed59f..9b4a4c7046107 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -71,12 +71,10 @@ using SymbolVec = std::vector; class SymbolTableImpl : public SymbolTable { public: /** - * Intermediate representation for a stat-name. This helps store multiple names - * in a single packed allocation. First we encode each desired name, then sum - * their sizes for the single packed allocation. This is used to store - * MetricImpl's tags and tagExtractedName. Like StatName, we don't want to pay - * a vptr overhead per object, and the representation is shared between the - * SymbolTable implementations, so this is just a pre-declare. + * Intermediate representation for a stat-name. This helps store multiple + * names in a single packed allocation. First we encode each desired name, + * then sum their sizes for the single packed allocation. This is used to + * store MetricImpl's tags and tagExtractedName. */ class Encoding { public: @@ -96,14 +94,6 @@ class SymbolTableImpl : public SymbolTable { */ void addSymbol(Symbol symbol); - /** - * Encodes an entire string into the vec, on behalf of FakeSymbolTableImpl. - * TODO(jmarantz): delete this method when FakeSymbolTableImpl is deleted. - * - * @param str The string to encode. - */ - void addStringForFakeSymbolTable(absl::string_view str); - /** * Decodes a uint8_t array into a SymbolVec. */ @@ -113,12 +103,12 @@ class SymbolTableImpl : public SymbolTable { * Returns the number of bytes required to represent StatName as a uint8_t * array, including the encoded size. */ - uint64_t bytesRequired() const { return size() + StatNameSizeEncodingBytes; } + uint64_t bytesRequired() const { return dataBytesRequired() + StatNameSizeEncodingBytes; } /** - * Returns the number of uint8_t entries we collected while adding symbols. + * @return the number of uint8_t entries we collected while adding symbols. */ - uint64_t size() const { return vec_.size(); } + uint64_t dataBytesRequired() const { return vec_.size(); } /** * Moves the contents of the vector into an allocated array. The array @@ -129,8 +119,6 @@ class SymbolTableImpl : public SymbolTable { */ uint64_t moveToStorage(SymbolTable::Storage array); - void swap(Encoding& src) { vec_.swap(src.vec_); } - private: std::vector vec_; }; @@ -145,18 +133,32 @@ class SymbolTableImpl : public SymbolTable { void free(const StatName& stat_name) override; void incRefCount(const StatName& stat_name) override; SymbolTable::StoragePtr join(const std::vector& stat_names) const override; - - void encode(absl::string_view name, Encoding& encoding); void populateList(absl::string_view* names, int32_t num_names, StatNameList& list) override; + StoragePtr copyToBytes(absl::string_view name) override; + void callWithStringView(StatName stat_name, + const std::function& fn) const override; #ifndef ENVOY_CONFIG_COVERAGE void debugPrint() const override; #endif - StoragePtr copyToBytes(absl::string_view name) override; + void encode(absl::string_view name, Encoding& encoding); - void callWithStringView(StatName stat_name, - const std::function& fn) const override; + /** + * Saves the specified length into the byte array, returning the next byte. + * There is no guarantee that bytes will be aligned, so we can't cast to a + * uint16_t* and assign, but must individually copy the bytes. + * + * @param length the length in bytes to write. Must be < StatNameMaxSize. + * @param bytes the pointer into which to write the length. + * @return the pointer to the next byte for writing the data. + */ + static inline uint8_t* writeLengthReturningNext(uint64_t length, uint8_t* bytes) { + ASSERT(length < StatNameMaxSize); + *bytes++ = length & 0xff; + *bytes++ = length >> 8; + return bytes; + } private: friend class StatName; From fac2a11d01f4baf070c1b59885c7de98c30813f3 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Wed, 20 Mar 2019 10:03:37 -0400 Subject: [PATCH 6/9] Improve some comments and method names. Signed-off-by: Joshua Marantz --- include/envoy/stats/symbol_table.h | 22 +++++++++++--------- source/common/stats/fake_symbol_table_impl.h | 20 +++++++----------- source/common/stats/symbol_table_impl.cc | 10 ++++----- source/common/stats/symbol_table_impl.h | 17 +++++++++++---- 4 files changed, 37 insertions(+), 32 deletions(-) diff --git a/include/envoy/stats/symbol_table.h b/include/envoy/stats/symbol_table.h index 7a89755d50f0b..cbe228440d7ab 100644 --- a/include/envoy/stats/symbol_table.h +++ b/include/envoy/stats/symbol_table.h @@ -93,9 +93,9 @@ class SymbolTable { * decode/encode into the elaborated form, and does not require locking the * SymbolTable. * - * The caveat is that this representation does not bump reference counts on - * the referenced Symbols in the SymbolTable, so it's only valid as long for - * the lifetime of the joined StatNames. + * Note that this method does not bump reference counts on the referenced + * Symbols in the SymbolTable, so it's only valid as long for the lifetime of + * the joined StatNames. * * This is intended for use doing cached name lookups of scoped stats, where * the scope prefix and the names to combine it with are already in StatName @@ -146,11 +146,15 @@ class SymbolTable { friend class StatNameStorage; friend class StatNameList; + // The following methods are private, but are called by friend classes + // StatNameStorage and StatNameList, which must be friendly with SymbolTable + // in order to manage the reference-counted symbols they own. + /** * Since SymbolTable does manual reference counting, a client of SymbolTable * must manually call free(symbol_vec) when it is freeing the backing store * for a StatName. This way, the symbol table will grow and shrink - * dynamically, instead of being write-only. + * dynamically, instead of being write-only * * @param stat_name the stat name. */ @@ -168,17 +172,15 @@ class SymbolTable { virtual void incRefCount(const StatName& stat_name) PURE; /** - * Encodes 'name' into the symbol table, tokenizing. Bumps reference counts - * for referenced symbols. The caller must manage the storage, and is - * responsible for calling SymbolTable::free() to release the reference - * counts. Note that this method is private, but is called by friend classes - * StatNameStorage and StatNameList. + * Encodes 'name' into the symbol table. Bumps reference counts for referenced + * symbols. The caller must manage the storage, and is responsible for calling + * SymbolTable::free() to release the reference counts. * * @param name The name to encode. * @return The encoded name, transferring ownership to the caller. * */ - virtual StoragePtr copyToBytes(absl::string_view name) PURE; + virtual StoragePtr encode(absl::string_view name) PURE; }; using SharedSymbolTable = std::shared_ptr; diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 53e0b3660132f..371d545d86131 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -104,6 +104,7 @@ class FakeSymbolTableImpl : public SymbolTable { } void free(const StatName&) override {} void incRefCount(const StatName&) override {} + StoragePtr encode(absl::string_view name) override { return encodeHelper(name); } SymbolTable::StoragePtr join(const std::vector& names) const override { std::vector strings; for (StatName name : names) { @@ -112,20 +113,13 @@ class FakeSymbolTableImpl : public SymbolTable { strings.push_back(str); } } - return stringToStorage(absl::StrJoin(strings, ".")); + return encodeHelper(absl::StrJoin(strings, ".")); } #ifndef ENVOY_CONFIG_COVERAGE void debugPrint() const override {} #endif - StoragePtr copyToBytes(absl::string_view name) override { - auto bytes = std::make_unique(name.size() + StatNameSizeEncodingBytes); - uint8_t* buffer = SymbolTableImpl::writeLengthReturningNext(name.size(), bytes.get()); - memcpy(buffer, name.data(), name.size()); - return bytes; - } - void callWithStringView(StatName stat_name, const std::function& fn) const override { fn(toStringView(stat_name)); @@ -136,11 +130,11 @@ class FakeSymbolTableImpl : public SymbolTable { return {reinterpret_cast(stat_name.data()), stat_name.dataSize()}; } - StoragePtr stringToStorage(absl::string_view name) const { - auto storage = std::make_unique(name.size() + 2); - uint8_t* p = SymbolTableImpl::writeLengthReturningNext(name.size(), storage.get()); - memcpy(p, name.data(), name.size()); - return storage; + StoragePtr encodeHelper(absl::string_view name) const { + auto bytes = std::make_unique(name.size() + StatNameSizeEncodingBytes); + uint8_t* buffer = SymbolTableImpl::writeLengthReturningNext(name.size(), bytes.get()); + memcpy(buffer, name.data(), name.size()); + return bytes; } }; diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index ce31206f51c82..deb33aee8c1d2 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -111,7 +111,7 @@ SymbolTableImpl::~SymbolTableImpl() { // TODO(ambuc): There is a possible performance optimization here for avoiding // the encoding of IPs / numbers if they appear in stat names. We don't want to // waste time symbolizing an integer as an integer, if we can help it. -void SymbolTableImpl::encode(const absl::string_view name, Encoding& encoding) { +void SymbolTableImpl::addTokensToEncoding(const absl::string_view name, Encoding& encoding) { if (name.empty()) { return; } @@ -286,16 +286,16 @@ void SymbolTableImpl::debugPrint() const { } #endif -SymbolTable::StoragePtr SymbolTableImpl::copyToBytes(absl::string_view name) { +SymbolTable::StoragePtr SymbolTableImpl::encode(absl::string_view name) { Encoding encoding; - encode(name, encoding); + addTokensToEncoding(name, encoding); auto bytes = std::make_unique(encoding.bytesRequired()); encoding.moveToStorage(bytes.get()); return bytes; } StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) - : bytes_(table.copyToBytes(name)) {} + : bytes_(table.encode(name)) {} StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { uint64_t size = src.size(); @@ -342,7 +342,7 @@ void SymbolTableImpl::populateList(absl::string_view* names, int32_t num_names, STACK_ARRAY(encodings, Encoding, num_names); for (int32_t i = 0; i < num_names; ++i) { Encoding& encoding = encodings[i]; - encode(names[i], encoding); + addTokensToEncoding(names[i], encoding); total_size_bytes += encoding.bytesRequired(); } diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 9b4a4c7046107..a4ace41be589e 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -134,7 +134,7 @@ class SymbolTableImpl : public SymbolTable { void incRefCount(const StatName& stat_name) override; SymbolTable::StoragePtr join(const std::vector& stat_names) const override; void populateList(absl::string_view* names, int32_t num_names, StatNameList& list) override; - StoragePtr copyToBytes(absl::string_view name) override; + StoragePtr encode(absl::string_view name) override; void callWithStringView(StatName stat_name, const std::function& fn) const override; @@ -142,8 +142,6 @@ class SymbolTableImpl : public SymbolTable { void debugPrint() const override; #endif - void encode(absl::string_view name, Encoding& encoding); - /** * Saves the specified length into the byte array, returning the next byte. * There is no guarantee that bytes will be aligned, so we can't cast to a @@ -201,9 +199,20 @@ class SymbolTableImpl : public SymbolTable { */ absl::string_view fromSymbol(Symbol symbol) const EXCLUSIVE_LOCKS_REQUIRED(lock_); - // Stages a new symbol for use. To be called after a successful insertion. + /** + * Stages a new symbol for use. To be called after a successful insertion. + */ void newSymbol(); + /** + * Tokenizes name, finds or allocates symbols for each token, and adds them + * to encoding. + * + * @param name The name to tokenize. + * @param encoding The encoding to write to. + */ + void addTokensToEncoding(absl::string_view name, Encoding& encoding); + Symbol monotonicCounter() { Thread::LockGuard lock(lock_); return monotonic_counter_; From d5254bfeea4cdb95baff22f98b6a1b1176995a98 Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Wed, 20 Mar 2019 10:22:53 -0400 Subject: [PATCH 7/9] Add 'tokenize' and 'tokenizes' to dictionary. Signed-off-by: Joshua Marantz --- tools/spelling_dictionary.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/spelling_dictionary.txt b/tools/spelling_dictionary.txt index 30b701ae8f1cf..829060a6bd5b4 100644 --- a/tools/spelling_dictionary.txt +++ b/tools/spelling_dictionary.txt @@ -691,6 +691,8 @@ th thru tm tmp +tokenize +tokenizes tokenizing traceid trie From 2acb4d33e64b31f08ab0a91d0e6ba5443a82c66f Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Sat, 30 Mar 2019 16:37:32 -0400 Subject: [PATCH 8/9] hail-mary to try to get past mysterious 'coverage' build failure. Signed-off-by: Joshua Marantz --- test/test_common/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_common/BUILD b/test/test_common/BUILD index b8f15ec9147ca..6044a39b67715 100644 --- a/test/test_common/BUILD +++ b/test/test_common/BUILD @@ -189,7 +189,9 @@ envoy_cc_test_library( hdrs = ["simulated_time_system.h"], deps = [ ":test_time_system_interface", + "//source/common/event:event_impl_base_lib", "//source/common/event:real_time_system_lib", + "//source/common/event:timer_lib", ], ) From c94e0d158d1a96406be0b214792b2dee4006900a Mon Sep 17 00:00:00 2001 From: Joshua Marantz Date: Mon, 1 Apr 2019 17:09:13 -0400 Subject: [PATCH 9/9] Address review comments. Signed-off-by: Joshua Marantz --- include/envoy/stats/symbol_table.h | 7 ++++--- source/common/stats/fake_symbol_table_impl.h | 7 ++++--- source/common/stats/symbol_table_impl.cc | 4 ++-- source/common/stats/symbol_table_impl.h | 3 ++- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/envoy/stats/symbol_table.h b/include/envoy/stats/symbol_table.h index cbe228440d7ab..4b4c8a4c4fe9e 100644 --- a/include/envoy/stats/symbol_table.h +++ b/include/envoy/stats/symbol_table.h @@ -112,11 +112,12 @@ class SymbolTable { * construction time to enable StatNameList to be instantiated directly in * a class that doesn't have a live SymbolTable when it is constructed. * - * @param names A pointer to the first name in an array. + * @param names A pointer to the first name in an array, allocated by the caller. * @param num_names The number of names. * @param symbol_table The symbol table in which to encode the names. */ - virtual void populateList(absl::string_view* names, int32_t num_names, StatNameList& list) PURE; + virtual void populateList(const absl::string_view* names, uint32_t num_names, + StatNameList& list) PURE; #ifndef ENVOY_CONFIG_COVERAGE virtual void debugPrint() const PURE; @@ -154,7 +155,7 @@ class SymbolTable { * Since SymbolTable does manual reference counting, a client of SymbolTable * must manually call free(symbol_vec) when it is freeing the backing store * for a StatName. This way, the symbol table will grow and shrink - * dynamically, instead of being write-only + * dynamically, instead of being write-only. * * @param stat_name the stat name. */ diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index 371d545d86131..71560908c1574 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -50,7 +50,8 @@ namespace Stats { class FakeSymbolTableImpl : public SymbolTable { public: // SymbolTable - void populateList(absl::string_view* names, int32_t num_names, StatNameList& list) override { + void populateList(const absl::string_view* names, uint32_t num_names, + StatNameList& list) override { // This implementation of populateList is similar to // SymbolTableImpl::populateList. This variant is more efficient for // FakeSymbolTableImpl, because it avoid "encoding" each name in names. The @@ -68,7 +69,7 @@ class FakeSymbolTableImpl : public SymbolTable { // lengths of each name. size_t total_size_bytes = 1 + num_names * StatNameSizeEncodingBytes; - for (int32_t i = 0; i < num_names; ++i) { + for (uint32_t i = 0; i < num_names; ++i) { total_size_bytes += names[i].size(); } @@ -77,7 +78,7 @@ class FakeSymbolTableImpl : public SymbolTable { auto storage = std::make_unique(total_size_bytes); uint8_t* p = &storage[0]; *p++ = num_names; - for (int32_t i = 0; i < num_names; ++i) { + for (uint32_t i = 0; i < num_names; ++i) { auto& name = names[i]; size_t sz = name.size(); p = SymbolTableImpl::writeLengthReturningNext(sz, p); diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index deb33aee8c1d2..17948ee8e90fb 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -332,7 +332,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const std::vector& stat_ return bytes; } -void SymbolTableImpl::populateList(absl::string_view* names, int32_t num_names, +void SymbolTableImpl::populateList(const absl::string_view* names, uint32_t num_names, StatNameList& list) { RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded"); @@ -340,7 +340,7 @@ void SymbolTableImpl::populateList(absl::string_view* names, int32_t num_names, size_t total_size_bytes = 1; /* one byte for holding the number of names */ STACK_ARRAY(encodings, Encoding, num_names); - for (int32_t i = 0; i < num_names; ++i) { + for (uint32_t i = 0; i < num_names; ++i) { Encoding& encoding = encodings[i]; addTokensToEncoding(names[i], encoding); total_size_bytes += encoding.bytesRequired(); diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index a4ace41be589e..ade67b7a563e1 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -133,7 +133,8 @@ class SymbolTableImpl : public SymbolTable { void free(const StatName& stat_name) override; void incRefCount(const StatName& stat_name) override; SymbolTable::StoragePtr join(const std::vector& stat_names) const override; - void populateList(absl::string_view* names, int32_t num_names, StatNameList& list) override; + void populateList(const absl::string_view* names, uint32_t num_names, + StatNameList& list) override; StoragePtr encode(absl::string_view name) override; void callWithStringView(StatName stat_name, const std::function& fn) const override;