diff --git a/include/envoy/stats/symbol_table.h b/include/envoy/stats/symbol_table.h index 3f8152a6c8593..3bc6ee4069835 100644 --- a/include/envoy/stats/symbol_table.h +++ b/include/envoy/stats/symbol_table.h @@ -1,13 +1,166 @@ #pragma once +#include +#include + +#include "envoy/common/pure.h" + +#include "absl/strings/string_view.h" + namespace Envoy { namespace Stats { -// Interface for referencing a stat name. +/** + * Runtime representation of an encoded stat name. This is predeclared only in + * the interface without abstract methods, because (a) the underlying class + * representation is common to both implementations of SymbolTable, and (b) + * we do not want or need the overhead of a vptr per StatName. The common + * declaration for StatName is in source/common/stats/symbol_table_impl.h + */ class StatName; -// Interface for managing symbol tables. -class SymbolTable; +/** + * Intermediate representation for a stat-name. This helps store multiple names + * in a single packed allocation. First we encode each desired name, then sum + * their sizes for the single packed allocation. This is used to store + * MetricImpl's tags and tagExtractedName. Like StatName, we don't want to pay + * a vptr overhead per object, and the representation is shared between the + * SymbolTable implementations, so this is just a pre-declare. + */ +class SymbolEncoding; + +/** + * SymbolTable manages a namespace optimized for stat names, exploiting their + * typical composition from "."-separated tokens, with a significant overlap + * between the tokens. The interface is designed to balance optimal storage + * at scale with hiding details from users. We seek to provide the most abstract + * interface possible that avoids adding per-stat overhead or taking locks in + * the hot path. + */ +class SymbolTable { +public: + /** + * Efficient byte-encoded storage of an array of tokens. The most common + * tokens are typically < 127, and are represented directly. tokens >= 128 + * spill into the next byte, allowing for tokens of arbitrary numeric value to + * be stored. As long as the most common tokens are low-valued, the + * representation is space-efficient. This scheme is similar to UTF-8. The + * token ordering is dependent on the order in which stat-names are encoded + * into the SymbolTable, which will not be optimal, but in practice appears + * to be pretty good. + * + * This is exposed in the interface for the benefit of join(), which which is + * used in the hot-path to append two stat-names into a temp without taking + * locks. This is used then in thread-local cache lookup, so that once warm, + * no locks are taken when looking up stats. + */ + using Storage = uint8_t[]; + using StoragePtr = std::unique_ptr; + + virtual ~SymbolTable() = default; + + /** + * Encodes a stat name using the symbol table, returning a SymbolEncoding. The + * SymbolEncoding is not intended for long-term storage, but is used to help + * allocate a StatName with the correct amount of storage. + * + * When a name is encoded, it bumps reference counts held in the table for + * each symbol. The caller is responsible for creating a StatName using this + * SymbolEncoding and ultimately disposing of it by calling + * SymbolTable::free(). Users are protected from leaking symbols into the pool + * by ASSERTions in the SymbolTable destructor. + * + * @param name The name to encode. + * @return SymbolEncoding the encoded symbols. + */ + virtual SymbolEncoding encode(absl::string_view name) PURE; + + /** + * @return uint64_t the number of symbols in the symbol table. + */ + virtual uint64_t numSymbols() const PURE; + + /** + * Decodes a vector of symbols back into its period-delimited stat name. If + * decoding fails on any part of the symbol_vec, we release_assert and crash, + * since this should never happen, and we don't want to continue running + * with a corrupt stats set. + * + * @param stat_name the stat name. + * @return std::string stringifiied stat_name. + */ + virtual std::string toString(const StatName& stat_name) const PURE; + + /** + * Deterines whether one StatName lexically precedes another. Note that + * the lexical order may not exactly match the lexical order of the + * elaborated strings. For example, stat-name of "-.-" would lexically + * sort after "---" but when encoded as a StatName would come lexically + * earlier. In practice this is unlikely to matter as those are not + * reasonable names for Envoy stats. + * + * Note that this operation has to be performed with the context of the + * SymbolTable so that the individual Symbol objects can be converted + * into strings for lexical comparison. + * + * @param a the first stat name + * @param b the second stat name + * @return bool true if a lexically precedes b. + */ + virtual bool lessThan(const StatName& a, const StatName& b) const PURE; + + /** + * Joins two or more StatNames. For example if we have StatNames for {"a.b", + * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The + * advantage of using this representation is that it avoids having to + * decode/encode into the elaborated form, and does not require locking the + * SymbolTable. + * + * The caveat is that this representation does not bump reference counts on + * the referenced Symbols in the SymbolTable, so it's only valid as long for + * the lifetime of the joined StatNames. + * + * This is intended for use doing cached name lookups of scoped stats, where + * the scope prefix and the names to combine it with are already in StatName + * form. Using this class, they can be combined without acessingm the + * SymbolTable or, in particular, taking its lock. + * + * @param stat_names the names to join. + * @return Storage allocated for the joined name. + */ + virtual StoragePtr join(const std::vector& stat_names) const PURE; + +#ifndef ENVOY_CONFIG_COVERAGE + virtual void debugPrint() const PURE; +#endif + +private: + friend class StatNameStorage; + friend class StatNameList; + + /** + * Since SymbolTable does manual reference counting, a client of SymbolTable + * must manually call free(symbol_vec) when it is freeing the backing store + * for a StatName. This way, the symbol table will grow and shrink + * dynamically, instead of being write-only. + * + * @param stat_name the stat name. + */ + virtual void free(const StatName& stat_name) PURE; + + /** + * StatName backing-store can be managed by callers in a variety of ways + * to minimize overhead. But any persistent reference to a StatName needs + * to hold onto its own reference-counts for all symbols. This method + * helps callers ensure the symbol-storage is maintained for the lifetime + * of a reference. + * + * @param stat_name the stat name. + */ + virtual void incRefCount(const StatName& stat_name) PURE; +}; + +using SharedSymbolTable = std::shared_ptr; } // namespace Stats } // namespace Envoy diff --git a/source/common/stats/BUILD b/source/common/stats/BUILD index 51b3b84c43177..85052c2361946 100644 --- a/source/common/stats/BUILD +++ b/source/common/stats/BUILD @@ -135,6 +135,12 @@ envoy_cc_library( ], ) +envoy_cc_library( + name = "fake_symbol_table_lib", + hdrs = ["fake_symbol_table_impl.h"], + deps = [":symbol_table_lib"], +) + envoy_cc_library( name = "stats_options_lib", hdrs = ["stats_options_impl.h"], diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h new file mode 100644 index 0000000000000..6c7e2f37bdf86 --- /dev/null +++ b/source/common/stats/fake_symbol_table_impl.h @@ -0,0 +1,98 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "envoy/common/exception.h" +#include "envoy/stats/symbol_table.h" + +#include "common/common/assert.h" +#include "common/common/hash.h" +#include "common/common/lock_guard.h" +#include "common/common/non_copyable.h" +#include "common/common/thread.h" +#include "common/common/utility.h" +#include "common/stats/symbol_table_impl.h" + +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" + +namespace Envoy { +namespace Stats { + +/** + * Implements the SymbolTable interface without taking locks or saving memory. + * This implementation is intended as a transient state for the Envoy codebase + * to allow incremental conversion of Envoy stats call-sites to use the + * SymbolTable interface, pre-allocating symbols during construction time for + * all stats tokens. + * + * Once all stat tokens are symbolized at construction time, this + * FakeSymbolTable implementation can be deleted, and real-symbol tables can be + * used, thereby reducing memory and improving stat construction time. + * + * Note that it is not necessary to pre-allocate all elaborated stat names + * because multiple StatNames can be joined together without taking locks, + * even in SymbolTableImpl. + * + * This implementation simply stores the characters directly in the uint8_t[] + * that backs each StatName, so there is no sharing or memory savings, but also + * no state associated with the SymbolTable, and thus no locks needed. + * + * TODO(jmarantz): delete this class once SymbolTable is fully deployed in the + * Envoy codebase. + */ +class FakeSymbolTableImpl : public SymbolTable { +public: + SymbolEncoding encode(absl::string_view name) override { return encodeHelper(name); } + + std::string toString(const StatName& stat_name) const override { + return std::string(toStringView(stat_name)); + } + uint64_t numSymbols() const override { return 0; } + bool lessThan(const StatName& a, const StatName& b) const override { + return toStringView(a) < toStringView(b); + } + void free(const StatName&) override {} + void incRefCount(const StatName&) override {} + SymbolTable::StoragePtr join(const std::vector& names) const override { + std::vector strings; + for (StatName name : names) { + absl::string_view str = toStringView(name); + if (!str.empty()) { + strings.push_back(str); + } + } + return stringToStorage(absl::StrJoin(strings, ".")); + } + +#ifndef ENVOY_CONFIG_COVERAGE + void debugPrint() const override {} +#endif + +private: + SymbolEncoding encodeHelper(absl::string_view name) const { + SymbolEncoding encoding; + encoding.addStringForFakeSymbolTable(name); + return encoding; + } + + absl::string_view toStringView(const StatName& stat_name) const { + return {reinterpret_cast(stat_name.data()), stat_name.dataSize()}; + } + + SymbolTable::StoragePtr stringToStorage(absl::string_view name) const { + SymbolEncoding encoding = encodeHelper(name); + auto bytes = std::make_unique(encoding.bytesRequired()); + encoding.moveToStorage(bytes.get()); + return bytes; + } +}; + +} // namespace Stats +} // namespace Envoy diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 6ec094ab877e4..82a217ba0a493 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -1,5 +1,6 @@ #include "common/stats/symbol_table_impl.h" +#include #include #include #include @@ -15,14 +16,10 @@ namespace Stats { static const uint32_t SpilloverMask = 0x80; static const uint32_t Low7Bits = 0x7f; -StatName::StatName(const StatName& src, SymbolStorage memory) : size_and_data_(memory) { +StatName::StatName(const StatName& src, SymbolTable::Storage memory) : size_and_data_(memory) { memcpy(memory, src.size_and_data_, src.size()); } -std::string StatName::toString(const SymbolTable& table) const { - return table.decode(data(), dataSize()); -} - #ifndef ENVOY_CONFIG_COVERAGE void StatName::debugPrint() { if (size_and_data_ == nullptr) { @@ -63,7 +60,14 @@ void SymbolEncoding::addSymbol(Symbol symbol) { } while (symbol != 0); } -SymbolVec SymbolEncoding::decodeSymbols(const SymbolStorage array, uint64_t size) { +void SymbolEncoding::addStringForFakeSymbolTable(absl::string_view str) { + if (!str.empty()) { + vec_.resize(str.size()); + memcpy(&vec_[0], str.data(), str.size()); + } +} + +SymbolVec SymbolEncoding::decodeSymbols(const SymbolTable::Storage array, uint64_t size) { SymbolVec symbol_vec; Symbol symbol = 0; for (uint32_t shift = 0; size > 0; --size, ++array) { @@ -94,7 +98,7 @@ static inline uint8_t* saveLengthToBytesReturningNext(uint64_t length, uint8_t* return bytes; } -uint64_t SymbolEncoding::moveToStorage(SymbolStorage symbol_array) { +uint64_t SymbolEncoding::moveToStorage(SymbolTable::Storage symbol_array) { uint64_t sz = size(); symbol_array = saveLengthToBytesReturningNext(sz, symbol_array); if (sz != 0) { @@ -104,11 +108,11 @@ uint64_t SymbolEncoding::moveToStorage(SymbolStorage symbol_array) { return sz + StatNameSizeEncodingBytes; } -SymbolTable::SymbolTable() +SymbolTableImpl::SymbolTableImpl() // Have to be explicitly initialized, if we want to use the GUARDED_BY macro. : next_symbol_(0), monotonic_counter_(0) {} -SymbolTable::~SymbolTable() { +SymbolTableImpl::~SymbolTableImpl() { // To avoid leaks into the symbol table, we expect all StatNames to be freed. // Note: this could potentially be short-circuited if we decide a fast exit // is needed in production. But it would be good to ensure clean up during @@ -119,7 +123,7 @@ SymbolTable::~SymbolTable() { // TODO(ambuc): There is a possible performance optimization here for avoiding // the encoding of IPs / numbers if they appear in stat names. We don't want to // waste time symbolizing an integer as an integer, if we can help it. -SymbolEncoding SymbolTable::encode(const absl::string_view name) { +SymbolEncoding SymbolTableImpl::encode(const absl::string_view name) { SymbolEncoding encoding; if (name.empty()) { @@ -148,11 +152,17 @@ SymbolEncoding SymbolTable::encode(const absl::string_view name) { return encoding; } -std::string SymbolTable::decode(const SymbolStorage symbol_array, uint64_t size) const { - return decodeSymbolVec(SymbolEncoding::decodeSymbols(symbol_array, size)); +uint64_t SymbolTableImpl::numSymbols() const { + Thread::LockGuard lock(lock_); + ASSERT(encode_map_.size() == decode_map_.size()); + return encode_map_.size(); +} + +std::string SymbolTableImpl::toString(const StatName& stat_name) const { + return decodeSymbolVec(SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize())); } -std::string SymbolTable::decodeSymbolVec(const SymbolVec& symbols) const { +std::string SymbolTableImpl::decodeSymbolVec(const SymbolVec& symbols) const { std::vector name_tokens; name_tokens.reserve(symbols.size()); { @@ -165,8 +175,8 @@ std::string SymbolTable::decodeSymbolVec(const SymbolVec& symbols) const { return absl::StrJoin(name_tokens, "."); } -void SymbolTable::incRefCount(const StatName& stat_name) { - // Before taking the lock, decode the array of symbols from the SymbolStorage. +void SymbolTableImpl::incRefCount(const StatName& stat_name) { + // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. SymbolVec symbols = SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); Thread::LockGuard lock(lock_); @@ -181,8 +191,8 @@ void SymbolTable::incRefCount(const StatName& stat_name) { } } -void SymbolTable::free(const StatName& stat_name) { - // Before taking the lock, decode the array of symbols from the SymbolStorage. +void SymbolTableImpl::free(const StatName& stat_name) { + // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. SymbolVec symbols = SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); Thread::LockGuard lock(lock_); @@ -206,8 +216,7 @@ void SymbolTable::free(const StatName& stat_name) { } } } - -Symbol SymbolTable::toSymbol(absl::string_view sv) { +Symbol SymbolTableImpl::toSymbol(absl::string_view sv) { Symbol result; auto encode_find = encode_map_.find(sv); // If the string segment doesn't already exist, @@ -233,13 +242,14 @@ Symbol SymbolTable::toSymbol(absl::string_view sv) { return result; } -absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const SHARED_LOCKS_REQUIRED(lock_) { +absl::string_view SymbolTableImpl::fromSymbol(const Symbol symbol) const + EXCLUSIVE_LOCKS_REQUIRED(lock_) { auto search = decode_map_.find(symbol); RELEASE_ASSERT(search != decode_map_.end(), "no such symbol"); - return absl::string_view(*search->second); + return {*search->second}; } -void SymbolTable::newSymbol() EXCLUSIVE_LOCKS_REQUIRED(lock_) { +void SymbolTableImpl::newSymbol() EXCLUSIVE_LOCKS_REQUIRED(lock_) { if (pool_.empty()) { next_symbol_ = ++monotonic_counter_; } else { @@ -250,16 +260,19 @@ void SymbolTable::newSymbol() EXCLUSIVE_LOCKS_REQUIRED(lock_) { ASSERT(monotonic_counter_ != 0); } -bool SymbolTable::lessThan(const StatName& a, const StatName& b) const { +bool SymbolTableImpl::lessThan(const StatName& a, const StatName& b) const { // Constructing two temp vectors during lessThan is not strictly necessary. // If this becomes a performance bottleneck (e.g. during sorting), we could // provide an iterator-like interface for incrementally decoding the symbols // without allocating memory. SymbolVec av = SymbolEncoding::decodeSymbols(a.data(), a.dataSize()); SymbolVec bv = SymbolEncoding::decodeSymbols(b.data(), b.dataSize()); + + // Calling fromSymbol requires holding the lock, as it needs read-access to + // the maps that are written when adding new symbols. + Thread::LockGuard lock(lock_); for (uint64_t i = 0, n = std::min(av.size(), bv.size()); i < n; ++i) { if (av[i] != bv[i]) { - Thread::LockGuard lock(lock_); bool ret = fromSymbol(av[i]) < fromSymbol(bv[i]); return ret; } @@ -268,7 +281,7 @@ bool SymbolTable::lessThan(const StatName& a, const StatName& b) const { } #ifndef ENVOY_CONFIG_COVERAGE -void SymbolTable::debugPrint() const { +void SymbolTableImpl::debugPrint() const { Thread::LockGuard lock(lock_); std::vector symbols; for (const auto& p : decode_map_) { @@ -309,30 +322,67 @@ void StatNameStorage::free(SymbolTable& table) { bytes_.reset(); } -StatNameJoiner::StatNameJoiner(StatName a, StatName b) { - const uint64_t a_size = a.dataSize(); - const uint64_t b_size = b.dataSize(); - uint8_t* const p = alloc(a_size + b_size); - memcpy(p, a.data(), a_size); - memcpy(p + a_size, b.data(), b_size); -} - -StatNameJoiner::StatNameJoiner(const std::vector& stat_names) { +SymbolTable::StoragePtr SymbolTableImpl::join(const std::vector& stat_names) const { uint64_t num_bytes = 0; for (StatName stat_name : stat_names) { num_bytes += stat_name.dataSize(); } - uint8_t* p = alloc(num_bytes); + auto bytes = std::make_unique(num_bytes + StatNameSizeEncodingBytes); + uint8_t* p = saveLengthToBytesReturningNext(num_bytes, bytes.get()); for (StatName stat_name : stat_names) { num_bytes = stat_name.dataSize(); memcpy(p, stat_name.data(), num_bytes); p += num_bytes; } + return bytes; +} + +StatNameList::~StatNameList() { ASSERT(!populated()); } + +void StatNameList::populate(const std::vector& names, + SymbolTable& symbol_table) { + RELEASE_ASSERT(names.size() < 256, "Maximum number elements in a StatNameList exceeded"); + + // First encode all the names. + size_t total_size_bytes = 1; /* one byte for holding the number of names */ + std::vector encodings; + encodings.resize(names.size()); + int index = 0; + for (auto& name : names) { + SymbolEncoding encoding = symbol_table.encode(name); + total_size_bytes += encoding.bytesRequired(); + encodings[index++].swap(encoding); + } + + // Now allocate the exact number of bytes required and move the encodings + // into storage. + storage_ = std::make_unique(total_size_bytes); + uint8_t* p = &storage_[0]; + *p++ = encodings.size(); + for (auto& encoding : encodings) { + p += encoding.moveToStorage(p); + } + ASSERT(p == &storage_[0] + total_size_bytes); +} + +void StatNameList::iterate(const std::function& f) const { + uint8_t* p = &storage_[0]; + uint32_t num_elements = *p++; + for (uint32_t i = 0; i < num_elements; ++i) { + StatName stat_name(p); + p += stat_name.size(); + if (!f(stat_name)) { + break; + } + } } -uint8_t* StatNameJoiner::alloc(uint64_t num_bytes) { - bytes_ = std::make_unique(num_bytes + StatNameSizeEncodingBytes); - return saveLengthToBytesReturningNext(num_bytes, bytes_.get()); +void StatNameList::clear(SymbolTable& symbol_table) { + iterate([&symbol_table](StatName stat_name) -> bool { + symbol_table.free(stat_name); + return true; + }); + storage_.reset(); } } // namespace Stats diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 6004ab22f2c21..f7c33ebd1d93f 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -28,15 +28,6 @@ namespace Stats { /** A Symbol represents a string-token with a small index. */ using Symbol = uint32_t; -/** - * Efficient byte-encoded storage of an array of tokens. The most common tokens - * are typically < 127, and are represented directly. tokens >= 128 spill into - * the next byte, allowing for tokens of arbitrary numeric value to be stored. - * As long as the most common tokens are low-valued, the representation is - * space-efficient. This scheme is similar to UTF-8. - */ -using SymbolStorage = uint8_t[]; - /** * We encode the byte-size of a StatName as its first two bytes. */ @@ -68,10 +59,18 @@ class SymbolEncoding { */ void addSymbol(Symbol symbol); + /** + * Encodes an entire string into the vec, on behalf of FakeSymbolTableImpl. + * TODO(jmarantz): delete this method when FakeSymbolTableImpl is deleted. + * + * @param str The string to encode. + */ + void addStringForFakeSymbolTable(absl::string_view str); + /** * Decodes a uint8_t array into a SymbolVec. */ - static SymbolVec decodeSymbols(const SymbolStorage array, uint64_t size); + static SymbolVec decodeSymbols(const SymbolTable::Storage array, uint64_t size); /** * Returns the number of bytes required to represent StatName as a uint8_t @@ -91,7 +90,7 @@ class SymbolEncoding { * @param array destination memory to receive the encoded bytes. * @return uint64_t the number of bytes transferred. */ - uint64_t moveToStorage(SymbolStorage array); + uint64_t moveToStorage(SymbolTable::Storage array); void swap(SymbolEncoding& src) { vec_.swap(src.vec_); } @@ -100,16 +99,17 @@ class SymbolEncoding { }; /** - * SymbolTable manages a namespace optimized for stats, which are typically + * SymbolTableImpl manages a namespace optimized for stats, which are typically * composed of arrays of "."-separated tokens, with a significant overlap * between the tokens. Each token is mapped to a Symbol (uint32_t) and * reference-counted so that no-longer-used symbols can be reclaimed. * - * We use a uint8_t array to encode arrays of symbols in order to conserve - * space, as in practice the majority of token instances in stat names draw from - * a fairly small set of common names, typically less than 100. The format is - * somewhat similar to UTF-8, with a variable-length array of uint8_t. See the - * implementation for details. + * We use a uint8_t array to encode a "."-deliminated stat-name into arrays of + * integer symbol symbol IDs in order to conserve space, as in practice the + * majority of token instances in stat names draw from a fairly small set of + * common names, typically less than 100. The format is somewhat similar to + * UTF-8, with a variable-length array of uint8_t. See the implementation for + * details. * * StatNameStorage can be used to manage memory for the byte-encoding. Not all * StatNames are backed by StatNameStorage -- the storage may be inlined into @@ -128,95 +128,24 @@ class SymbolEncoding { * same string is re-encoded, it may or may not encode to the same underlying * symbol. */ -class SymbolTable { +class SymbolTableImpl : public SymbolTable { public: - SymbolTable(); - ~SymbolTable(); - - /** - * Encodes a stat name using the symbol table, returning a SymbolEncoding. The - * SymbolEncoding is not intended for long-term storage, but is used to help - * allocate and StatName with the correct amount of storage. - * - * When a name is encoded, it bumps reference counts held in the table for - * each symbol. The caller is responsible for creating a StatName using this - * SymbolEncoding and ultimately disposing of it by calling - * StatName::free(). Otherwise the symbols will leak for the lifetime of the - * table, though they won't show up as a C++ leaks as the memory is still - * reachable from the SymbolTable. - * - * @param name The name to encode. - * @return SymbolEncoding the encoded symbols. - */ - SymbolEncoding encode(absl::string_view name); - - /** - * @return uint64_t the number of symbols in the symbol table. - */ - uint64_t numSymbols() const { - Thread::LockGuard lock(lock_); - ASSERT(encode_map_.size() == decode_map_.size()); - uint64_t sz = encode_map_.size(); - return sz; - } - - /** - * Determines whether one StatName lexically precedes another. Note that - * the lexical order may not exactly match the lexical order of the - * elaborated strings. For example, stat-name of "-.-" would lexically - * sort after "---" but when encoded as a StatName would come lexically - * earlier. In practice this is unlikely to matter as those are not - * reasonable names for Envoy stats. - * - * Note that this operation has to be performed with the context of the - * SymbolTable so that the individual Symbol objects can be converted - * into strings for lexical comparison. - * - * @param a the first stat name - * @param b the second stat name - * @return bool true if a lexically precedes b. - */ - bool lessThan(const StatName& a, const StatName& b) const; - - /** - * Since SymbolTable does manual reference counting, a client of SymbolTable - * must manually call free(stat_name) when it is freeing the backing store - * for a StatName. This way, the symbol table will grow and shrink - * dynamically, instead of being write-only. - * - * @param stat_name the stat name whose symbols should be freed. - */ - void free(const StatName& stat_name); - - /** - * StatName backing-store can be managed by callers in a variety of ways - * to minimize overhead. But any persistent reference to a StatName needs - * to hold onto its own reference-counts for all symbols. This method - * helps callers ensure the symbol-storage is maintained for the lifetime - * of a reference. - * - * @param stat_name the stat name whose symbols should be freed. - */ - void incRefCount(const StatName& stat_name); + SymbolTableImpl(); + ~SymbolTableImpl() override; + + // SymbolTable + std::string toString(const StatName& stat_name) const override; + SymbolEncoding encode(absl::string_view name) override; + uint64_t numSymbols() const override; + bool lessThan(const StatName& a, const StatName& b) const override; + void free(const StatName& stat_name) override; + void incRefCount(const StatName& stat_name) override; + SymbolTable::StoragePtr join(const std::vector& stat_names) const override; #ifndef ENVOY_CONFIG_COVERAGE - // It is convenient when debugging to be able to print the state of the table, - // but this code is not hit during tests ordinarily, and is not needed in - // production code. - void debugPrint() const; + void debugPrint() const override; #endif - /** - * Decodes a vector of symbols back into its period-delimited stat name. If - * decoding fails on any part of the symbol_vec, we release_assert, since this - * should never happen, and we don't want to continue running with a corrupt - * stats set. - * - * @param symbol_vec the vector of symbols to decode. - * @return std::string the retrieved stat name. - */ - std::string decode(const SymbolStorage symbol_vec, uint64_t size) const; - private: friend class StatName; friend class StatNameTest; @@ -256,12 +185,15 @@ class SymbolTable { * @param symbol the individual symbol to be decoded. * @return absl::string_view the decoded string. */ - absl::string_view fromSymbol(Symbol symbol) const; + absl::string_view fromSymbol(Symbol symbol) const EXCLUSIVE_LOCKS_REQUIRED(lock_); // Stages a new symbol for use. To be called after a successful insertion. void newSymbol(); - Symbol monotonicCounter() { return monotonic_counter_; } + Symbol monotonicCounter() { + Thread::LockGuard lock(lock_); + return monotonic_counter_; + } // Stores the symbol to be used at next insertion. This should exist ahead of insertion time so // that if insertion succeeds, the value written is the correct one. @@ -332,7 +264,7 @@ class StatNameStorage { inline StatName statName() const; private: - std::unique_ptr bytes_; + SymbolTable::StoragePtr bytes_; }; /** @@ -349,7 +281,7 @@ class StatName { public: // Constructs a StatName object directly referencing the storage of another // StatName. - explicit StatName(const SymbolStorage size_and_data) : size_and_data_(size_and_data) {} + explicit StatName(const SymbolTable::Storage size_and_data) : size_and_data_(size_and_data) {} // Constructs an empty StatName object. StatName() : size_and_data_(nullptr) {} @@ -357,9 +289,7 @@ class StatName { // Constructs a StatName object with new storage, which must be of size // src.size(). This is used in the a flow where we first construct a StatName // for lookup in a cache, and then on a miss need to store the data directly. - StatName(const StatName& src, SymbolStorage memory); - - std::string toString(const SymbolTable& table) const; + StatName(const StatName& src, SymbolTable::Storage memory); /** * Note that this hash function will return a different hash than that of @@ -392,7 +322,7 @@ class StatName { */ uint64_t size() const { return dataSize() + StatNameSizeEncodingBytes; } - void copyToStorage(SymbolStorage storage) { memcpy(storage, size_and_data_, size()); } + void copyToStorage(SymbolTable::Storage storage) { memcpy(storage, size_and_data_, size()); } #ifndef ENVOY_CONFIG_COVERAGE void debugPrint(); @@ -409,37 +339,6 @@ class StatName { StatName StatNameStorage::statName() const { return StatName(bytes_.get()); } -/** - * Joins two or more StatNames. For example if we have StatNames for {"a.b", - * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The advantage - * of using this representation is that it avoids having to decode/encode - * into the elaborated form, and does not require locking the SymbolTable. - * - * The caveat is that this representation does not bump reference counts on - * for the referenced Symbols in the SymbolTable, so it's only valid as long - * for the lifetime of the joined StatNames. - * - * This is intended for use doing cached name lookups of scoped stats, where - * the scope prefix and the names to combine it with are already in StatName - * form. Using this class, they can be combined without accessing the - * SymbolTable or, in particular, taking its lock. - */ -class StatNameJoiner { -public: - StatNameJoiner(StatName a, StatName b); - StatNameJoiner(const std::vector& stat_names); - - /** - * @return StatName a reference to the joined StatName. - */ - StatName statName() const { return StatName(bytes_.get()); } - -private: - uint8_t* alloc(uint64_t num_bytes); - - std::unique_ptr bytes_; -}; - /** * Contains the backing store for a StatName and enough context so it can * self-delete through RAII. This works by augmenting StatNameStorage with a @@ -465,6 +364,56 @@ class StatNameTempStorage : public StatNameStorage { SymbolTable& symbol_table_; }; +// Represents an ordered container of StatNames. The encoding for each StatName +// is byte-packed together, so this carries less overhead than allocating the +// storage separately. The tradeoff is there is no random access; you can only +// iterate through the StatNames. +// +// The maximum size of the list is 255 elements, so the length can fit in a +// byte. It would not be difficult to increase this, but there does not appear +// to be a current need. +class StatNameList { +public: + ~StatNameList(); + + /** + * Populates the StatNameList from a list of encodings. This is not done at + * construction time to enable StatNameList to be instantiated directly in + * a class that doesn't have a live SymbolTable when it is constructed. + * + * @param encodings The list names to encode. + * @param symbol_table The symbol table in which to encode the names. + */ + void populate(const std::vector& encodings, SymbolTable& symbol_table); + + /** + * @return true if populate() has been called on this list. + */ + bool populated() const { return storage_ != nullptr; } + + /** + * Iterates over each StatName in the list, calling f(StatName). f() should + * return true to keep iterating, or false to end the iteration. + * + * @param f The function to call on each stat. + */ + void iterate(const std::function& f) const; + + /** + * Frees each StatName in the list. Failure to call this before destruction + * results in an ASSERT at destruction of the list and the SymbolTable. + * + * This is not done as part of destruction as the SymbolTable may already + * be destroyed. + * + * @param symbol_table the symbol table. + */ + void clear(SymbolTable& symbol_table); + +private: + std::unique_ptr storage_; +}; + // Helper class for constructing hash-tables with StatName keys. struct StatNameHash { size_t operator()(const StatName& a) const { return a.hash(); } diff --git a/test/common/stats/BUILD b/test/common/stats/BUILD index 2abece37c900e..b3d2020e43890 100644 --- a/test/common/stats/BUILD +++ b/test/common/stats/BUILD @@ -74,6 +74,7 @@ envoy_cc_test( ":stat_test_utility_lib", "//source/common/common:mutex_tracer_lib", "//source/common/memory:stats_lib", + "//source/common/stats:fake_symbol_table_lib", "//source/common/stats:symbol_table_lib", "//test/mocks/stats:stats_mocks", "//test/test_common:logging_lib", diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index ab4b96761ba7b..6b91a15236f41 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -2,6 +2,7 @@ #include "common/common/mutex_tracer_impl.h" #include "common/memory/stats.h" +#include "common/stats/fake_symbol_table_impl.h" #include "common/stats/symbol_table_impl.h" #include "test/common/stats/stat_test_utility.h" @@ -14,82 +15,116 @@ namespace Envoy { namespace Stats { -class StatNameTest : public testing::Test { +// See comments in fake_symbol_table_impl.h: we need to test two implementations +// of SymbolTable, which we'll do with a test parameterized on this enum. +// +// Note that some of the tests cover behavior that is specific to the real +// SymbolTableImpl, and thus early-exit when the param is Fake. +// +// TODO(jmarantz): un-parameterize this test once SymbolTable is fully deployed +// and FakeSymbolTableImpl can be deleted. +enum class SymbolTableType { + Real, + Fake, +}; + +class StatNameTest : public testing::TestWithParam { protected: - ~StatNameTest() { clearStorage(); } + StatNameTest() { + switch (GetParam()) { + case SymbolTableType::Real: { + auto table = std::make_unique(); + real_symbol_table_ = table.get(); + table_ = std::move(table); + break; + } + case SymbolTableType::Fake: + table_ = std::make_unique(); + break; + } + } + ~StatNameTest() override { clearStorage(); } void clearStorage() { for (auto& stat_name_storage : stat_name_storage_) { - stat_name_storage.free(table_); + stat_name_storage.free(*table_); } stat_name_storage_.clear(); - EXPECT_EQ(0, table_.numSymbols()); + EXPECT_EQ(0, table_->numSymbols()); } SymbolVec getSymbols(StatName stat_name) { return SymbolEncoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); } std::string decodeSymbolVec(const SymbolVec& symbol_vec) { - return table_.decodeSymbolVec(symbol_vec); + return real_symbol_table_->decodeSymbolVec(symbol_vec); } - Symbol monotonicCounter() { return table_.monotonicCounter(); } + Symbol monotonicCounter() { return real_symbol_table_->monotonicCounter(); } std::string encodeDecode(absl::string_view stat_name) { - return makeStat(stat_name).toString(table_); + return table_->toString(makeStat(stat_name)); } - StatNameStorage makeStatStorage(absl::string_view name) { return StatNameStorage(name, table_); } + StatNameStorage makeStatStorage(absl::string_view name) { return StatNameStorage(name, *table_); } StatName makeStat(absl::string_view name) { stat_name_storage_.emplace_back(makeStatStorage(name)); return stat_name_storage_.back().statName(); } - SymbolTable table_; + SymbolTableImpl* real_symbol_table_{nullptr}; + std::unique_ptr table_; std::vector stat_name_storage_; }; -TEST_F(StatNameTest, AllocFree) { encodeDecode("hello.world"); } +INSTANTIATE_TEST_CASE_P(StatNameTest, StatNameTest, + testing::ValuesIn({SymbolTableType::Real, SymbolTableType::Fake})); + +TEST_P(StatNameTest, AllocFree) { encodeDecode("hello.world"); } -TEST_F(StatNameTest, TestArbitrarySymbolRoundtrip) { +TEST_P(StatNameTest, TestArbitrarySymbolRoundtrip) { const std::vector stat_names = {"", " ", " ", ",", "\t", "$", "%", "`", "."}; - for (auto stat_name : stat_names) { + for (auto& stat_name : stat_names) { EXPECT_EQ(stat_name, encodeDecode(stat_name)); } } -TEST_F(StatNameTest, Test100kSymbolsRoundtrip) { +TEST_P(StatNameTest, Test100KSymbolsRoundtrip) { for (int i = 0; i < 100 * 1000; ++i) { const std::string stat_name = absl::StrCat("symbol_", i); EXPECT_EQ(stat_name, encodeDecode(stat_name)); } } -TEST_F(StatNameTest, TestUnusualDelimitersRoundtrip) { +TEST_P(StatNameTest, TestUnusualDelimitersRoundtrip) { const std::vector stat_names = {".", "..", "...", "foo", "foo.", ".foo", ".foo.", ".foo..", "..foo.", "..foo.."}; - for (auto stat_name : stat_names) { + for (auto& stat_name : stat_names) { EXPECT_EQ(stat_name, encodeDecode(stat_name)); } } -TEST_F(StatNameTest, TestSuccessfulDoubleLookup) { +TEST_P(StatNameTest, TestSuccessfulDoubleLookup) { StatName stat_name_1(makeStat("foo.bar.baz")); StatName stat_name_2(makeStat("foo.bar.baz")); EXPECT_EQ(stat_name_1, stat_name_2); } -TEST_F(StatNameTest, TestSuccessfulDecode) { +TEST_P(StatNameTest, TestSuccessfulDecode) { std::string stat_name = "foo.bar.baz"; StatName stat_name_1(makeStat(stat_name)); StatName stat_name_2(makeStat(stat_name)); - EXPECT_EQ(stat_name_1.toString(table_), stat_name_2.toString(table_)); - EXPECT_EQ(stat_name_1.toString(table_), stat_name); + EXPECT_EQ(table_->toString(stat_name_1), table_->toString(stat_name_2)); + EXPECT_EQ(table_->toString(stat_name_1), stat_name); } class StatNameDeathTest : public StatNameTest {}; -TEST_F(StatNameDeathTest, TestBadDecodes) { +TEST_P(StatNameDeathTest, TestBadDecodes) { + if (GetParam() == SymbolTableType::Fake) { + return; + } + { // If a symbol doesn't exist, decoding it should trigger an ASSERT() and crash. SymbolVec bad_symbol_vec = {1}; // symbol 0 is the empty symbol. @@ -107,14 +142,17 @@ TEST_F(StatNameDeathTest, TestBadDecodes) { } } -TEST_F(StatNameTest, TestDifferentStats) { +TEST_P(StatNameTest, TestDifferentStats) { StatName stat_name_1(makeStat("foo.bar")); StatName stat_name_2(makeStat("bar.foo")); - EXPECT_NE(stat_name_1.toString(table_), stat_name_2.toString(table_)); + EXPECT_NE(table_->toString(stat_name_1), table_->toString(stat_name_2)); EXPECT_NE(stat_name_1, stat_name_2); } -TEST_F(StatNameTest, TestSymbolConsistency) { +TEST_P(StatNameTest, TestSymbolConsistency) { + if (GetParam() == SymbolTableType::Fake) { + return; + } StatName stat_name_1(makeStat("foo.bar")); StatName stat_name_2(makeStat("bar.foo")); // We expect the encoding of "foo" in one context to be the same as another. @@ -124,13 +162,13 @@ TEST_F(StatNameTest, TestSymbolConsistency) { EXPECT_EQ(vec_2[0], vec_1[1]); } -TEST_F(StatNameTest, TestSameValueOnPartialFree) { +TEST_P(StatNameTest, TestSameValueOnPartialFree) { // This should hold true for components as well. Since "foo" persists even when "foo.bar" is // freed, we expect both instances of "foo" to have the same symbol. makeStat("foo"); StatNameStorage stat_foobar_1(makeStatStorage("foo.bar")); SymbolVec stat_foobar_1_symbols = getSymbols(stat_foobar_1.statName()); - stat_foobar_1.free(table_); + stat_foobar_1.free(*table_); StatName stat_foobar_2(makeStat("foo.bar")); SymbolVec stat_foobar_2_symbols = getSymbols(stat_foobar_2); @@ -139,7 +177,11 @@ TEST_F(StatNameTest, TestSameValueOnPartialFree) { // And we have no expectation for the "bar" components, because of the free pool. } -TEST_F(StatNameTest, FreePoolTest) { +TEST_P(StatNameTest, FreePoolTest) { + if (GetParam() == SymbolTableType::Fake) { + return; + } + // To ensure that the free pool is being used, we should be able to cycle through a large number // of stats while validating that: // a) the size of the table has not increased, and @@ -153,11 +195,11 @@ TEST_F(StatNameTest, FreePoolTest) { makeStat("4a"); makeStat("5a"); EXPECT_EQ(monotonicCounter(), 5); - EXPECT_EQ(table_.numSymbols(), 5); + EXPECT_EQ(table_->numSymbols(), 5); clearStorage(); } EXPECT_EQ(monotonicCounter(), 5); - EXPECT_EQ(table_.numSymbols(), 0); + EXPECT_EQ(table_->numSymbols(), 0); // These are different strings being encoded, but they should recycle through the same symbols as // the stats above. @@ -167,55 +209,55 @@ TEST_F(StatNameTest, FreePoolTest) { makeStat("4b"); makeStat("5b"); EXPECT_EQ(monotonicCounter(), 5); - EXPECT_EQ(table_.numSymbols(), 5); + EXPECT_EQ(table_->numSymbols(), 5); makeStat("6"); EXPECT_EQ(monotonicCounter(), 6); - EXPECT_EQ(table_.numSymbols(), 6); + EXPECT_EQ(table_->numSymbols(), 6); } -TEST_F(StatNameTest, TestShrinkingExpectation) { +TEST_P(StatNameTest, TestShrinkingExpectation) { // We expect that as we free stat names, the memory used to store those underlying symbols will // be freed. // ::size() is a public function, but should only be used for testing. - size_t table_size_0 = table_.numSymbols(); + size_t table_size_0 = table_->numSymbols(); StatNameStorage stat_a(makeStatStorage("a")); - size_t table_size_1 = table_.numSymbols(); + size_t table_size_1 = table_->numSymbols(); StatNameStorage stat_aa(makeStatStorage("a.a")); - EXPECT_EQ(table_size_1, table_.numSymbols()); + EXPECT_EQ(table_size_1, table_->numSymbols()); StatNameStorage stat_ab(makeStatStorage("a.b")); - size_t table_size_2 = table_.numSymbols(); + size_t table_size_2 = table_->numSymbols(); StatNameStorage stat_ac(makeStatStorage("a.c")); - size_t table_size_3 = table_.numSymbols(); + size_t table_size_3 = table_->numSymbols(); StatNameStorage stat_acd(makeStatStorage("a.c.d")); - size_t table_size_4 = table_.numSymbols(); + size_t table_size_4 = table_->numSymbols(); StatNameStorage stat_ace(makeStatStorage("a.c.e")); - size_t table_size_5 = table_.numSymbols(); + size_t table_size_5 = table_->numSymbols(); EXPECT_GE(table_size_5, table_size_4); - stat_ace.free(table_); - EXPECT_EQ(table_size_4, table_.numSymbols()); + stat_ace.free(*table_); + EXPECT_EQ(table_size_4, table_->numSymbols()); - stat_acd.free(table_); - EXPECT_EQ(table_size_3, table_.numSymbols()); + stat_acd.free(*table_); + EXPECT_EQ(table_size_3, table_->numSymbols()); - stat_ac.free(table_); - EXPECT_EQ(table_size_2, table_.numSymbols()); + stat_ac.free(*table_); + EXPECT_EQ(table_size_2, table_->numSymbols()); - stat_ab.free(table_); - EXPECT_EQ(table_size_1, table_.numSymbols()); + stat_ab.free(*table_); + EXPECT_EQ(table_size_1, table_->numSymbols()); - stat_aa.free(table_); - EXPECT_EQ(table_size_1, table_.numSymbols()); + stat_aa.free(*table_); + EXPECT_EQ(table_size_1, table_->numSymbols()); - stat_a.free(table_); - EXPECT_EQ(table_size_0, table_.numSymbols()); + stat_a.free(*table_); + EXPECT_EQ(table_size_0, table_->numSymbols()); } // In the tests above we use the StatNameStorage abstraction which is not the @@ -223,29 +265,35 @@ TEST_F(StatNameTest, TestShrinkingExpectation) { // you may want to store bytes in a larger structure. For example, you might // want to allocate two different StatName objects in contiguous memory. The // safety-net here in terms of leaks is that SymbolTable will assert-fail if -// you don't free all the StatNames you've allocated bytes for. -TEST_F(StatNameTest, StoringWithoutStatNameStorage) { - SymbolEncoding hello_encoding = table_.encode("hello.world"); - SymbolEncoding goodbye_encoding = table_.encode("goodbye.world"); - size_t size = hello_encoding.bytesRequired() + goodbye_encoding.bytesRequired(); - size_t goodbye_offset = hello_encoding.bytesRequired(); - std::unique_ptr storage(new uint8_t[size]); - hello_encoding.moveToStorage(storage.get()); - goodbye_encoding.moveToStorage(storage.get() + goodbye_offset); - - StatName hello(storage.get()); - StatName goodbye(storage.get() + goodbye_offset); - - EXPECT_EQ("hello.world", hello.toString(table_)); - EXPECT_EQ("goodbye.world", goodbye.toString(table_)); - - // If we don't explicitly call free() on the the StatName objects the - // SymbolTable will assert on destruction. - table_.free(hello); - table_.free(goodbye); +// you don't free all the StatNames you've allocated bytes for. StatNameList +// provides this capability. +TEST_P(StatNameTest, List) { + std::vector names{"hello.world", "goodbye.world"}; + StatNameList name_list; + EXPECT_FALSE(name_list.populated()); + name_list.populate(names, *table_); + EXPECT_TRUE(name_list.populated()); + + // First, decode only the first name. + name_list.iterate([this](StatName stat_name) -> bool { + EXPECT_EQ("hello.world", table_->toString(stat_name)); + return false; + }); + + // Decode all the names. + std::vector decoded_strings; + name_list.iterate([this, &decoded_strings](StatName stat_name) -> bool { + decoded_strings.push_back(table_->toString(stat_name)); + return true; + }); + ASSERT_EQ(2, decoded_strings.size()); + EXPECT_EQ("hello.world", decoded_strings[0]); + EXPECT_EQ("goodbye.world", decoded_strings[1]); + name_list.clear(*table_); + EXPECT_FALSE(name_list.populated()); } -TEST_F(StatNameTest, HashTable) { +TEST_P(StatNameTest, HashTable) { StatName ac = makeStat("a.c"); StatName ab = makeStat("a.b"); StatName de = makeStat("d.e"); @@ -263,62 +311,141 @@ TEST_F(StatNameTest, HashTable) { EXPECT_EQ(3, name_int_map[de]); } -TEST_F(StatNameTest, Sort) { +TEST_P(StatNameTest, Sort) { std::vector names{makeStat("a.c"), makeStat("a.b"), makeStat("d.e"), makeStat("d.a.a"), makeStat("d.a"), makeStat("a.c")}; const std::vector sorted_names{makeStat("a.b"), makeStat("a.c"), makeStat("a.c"), makeStat("d.a"), makeStat("d.a.a"), makeStat("d.e")}; EXPECT_NE(names, sorted_names); - std::sort(names.begin(), names.end(), StatNameLessThan(table_)); + std::sort(names.begin(), names.end(), StatNameLessThan(*table_)); EXPECT_EQ(names, sorted_names); } -TEST_F(StatNameTest, Concat2) { - StatNameJoiner joiner(makeStat("a.b"), makeStat("c.d")); - EXPECT_EQ("a.b.c.d", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Concat2) { + SymbolTable::StoragePtr joined = table_->join({makeStat("a.b"), makeStat("c.d")}); + EXPECT_EQ("a.b.c.d", table_->toString(StatName(joined.get()))); +} + +TEST_P(StatNameTest, ConcatFirstEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat(""), makeStat("c.d")}); + EXPECT_EQ("c.d", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, ConcatFirstEmpty) { - StatNameJoiner joiner(makeStat(""), makeStat("c.d")); - EXPECT_EQ("c.d", joiner.statName().toString(table_)); +TEST_P(StatNameTest, ConcatSecondEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat("a.b"), makeStat("")}); + EXPECT_EQ("a.b", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, ConcatSecondEmpty) { - StatNameJoiner joiner(makeStat("a.b"), makeStat("")); - EXPECT_EQ("a.b", joiner.statName().toString(table_)); +TEST_P(StatNameTest, ConcatAllEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat(""), makeStat("")}); + EXPECT_EQ("", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, ConcatAllEmpty) { - StatNameJoiner joiner(makeStat(""), makeStat("")); - EXPECT_EQ("", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Join3) { + SymbolTable::StoragePtr joined = + table_->join({makeStat("a.b"), makeStat("c.d"), makeStat("e.f")}); + EXPECT_EQ("a.b.c.d.e.f", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, Join3) { - StatNameJoiner joiner({makeStat("a.b"), makeStat("c.d"), makeStat("e.f")}); - EXPECT_EQ("a.b.c.d.e.f", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Join3FirstEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat(""), makeStat("c.d"), makeStat("e.f")}); + EXPECT_EQ("c.d.e.f", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, Join3FirstEmpty) { - StatNameJoiner joiner({makeStat(""), makeStat("c.d"), makeStat("e.f")}); - EXPECT_EQ("c.d.e.f", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Join3SecondEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat("a.b"), makeStat(""), makeStat("e.f")}); + EXPECT_EQ("a.b.e.f", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, Join3SecondEmpty) { - StatNameJoiner joiner({makeStat("a.b"), makeStat(""), makeStat("e.f")}); - EXPECT_EQ("a.b.e.f", joiner.statName().toString(table_)); +TEST_P(StatNameTest, Join3ThirdEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat("a.b"), makeStat("c.d"), makeStat("")}); + EXPECT_EQ("a.b.c.d", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, Join3ThirdEmpty) { - StatNameJoiner joiner({makeStat("a.b"), makeStat("c.d"), makeStat("")}); - EXPECT_EQ("a.b.c.d", joiner.statName().toString(table_)); +TEST_P(StatNameTest, JoinAllEmpty) { + SymbolTable::StoragePtr joined = table_->join({makeStat(""), makeStat(""), makeStat("")}); + EXPECT_EQ("", table_->toString(StatName(joined.get()))); } -TEST_F(StatNameTest, JoinAllEmpty) { - StatNameJoiner joiner({makeStat(""), makeStat(""), makeStat("")}); - EXPECT_EQ("", joiner.statName().toString(table_)); +TEST_P(StatNameTest, RacingSymbolCreation) { + Thread::ThreadFactory& thread_factory = Thread::threadFactoryForTest(); + MutexTracerImpl& mutex_tracer = MutexTracerImpl::getOrCreateTracer(); + + // Make 100 threads, each of which will race to encode an overlapping set of + // symbols, triggering corner-cases in SymbolTable::toSymbol. + constexpr int num_threads = 100; + std::vector threads; + threads.reserve(num_threads); + ConditionalInitializer creation, access, wait; + absl::BlockingCounter creates(num_threads), accesses(num_threads); + for (int i = 0; i < num_threads; ++i) { + threads.push_back( + thread_factory.createThread([this, i, &creation, &access, &wait, &creates, &accesses]() { + // Rotate between 20 different symbols to try to get some + // contention. Based on a logging print statement in + // SymbolTable::toSymbol(), this appears to trigger creation-races, + // even when compiled with optimization. + std::string stat_name_string = absl::StrCat("symbol", i % 20); + + // Block each thread on waking up a common condition variable, + // so we make it likely to race on creation. + creation.wait(); + StatNameTempStorage initial(stat_name_string, *table_); + creates.DecrementCount(); + + access.wait(); + StatNameTempStorage second(stat_name_string, *table_); + accesses.DecrementCount(); + + wait.wait(); + })); + } + creation.setReady(); + creates.Wait(); + + int64_t create_contentions = mutex_tracer.numContentions(); + ENVOY_LOG_MISC(info, "Number of contentions: {}", create_contentions); + + // But when we access the already-existing symbols, we guarantee that no + // further mutex contentions occur. + int64_t start_contentions = mutex_tracer.numContentions(); + access.setReady(); + accesses.Wait(); + + // With fake symbol tables, there are no contentions as there are is no state + // in the symbol table to lock. This is why fake symbol tables are a safe way + // to transition the codebase to use the SymbolTable API without impacting + // hot-path performance. + if (GetParam() == SymbolTableType::Fake) { + EXPECT_EQ(create_contentions, mutex_tracer.numContentions()); + EXPECT_EQ(start_contentions, create_contentions); + } + + // In a perfect world, we could use reader-locks in the SymbolTable + // implementation, and there should be zero additional contentions + // after latching 'create_contentions' above. And we can definitely + // have this world, but this slows down BM_CreateRace in + // symbol_table_speed_test.cc, even on a 72-core machine. + // + // Thus it is better to avoid symbol-table contention by refactoring + // all stat-creation code to symbolize all stat string elements at + // construction, as composition does not require a lock. + // + // See this commit + // https://github.com/envoyproxy/envoy/pull/5321/commits/ef712d0f5a11ff49831c1935e8a2ef8a0a935bc9 + // for a working reader-lock implementation, which would pass this EXPECT: + // EXPECT_EQ(create_contentions, mutex_tracer.numContentions()); + // + // Note also that we cannot guarantee there *will* be contentions + // as a machine or OS is free to run all threads serially. + + wait.setReady(); + for (auto& thread : threads) { + thread->join(); + } } -TEST_F(StatNameTest, MutexContentionOnExistingSymbols) { +TEST_P(StatNameTest, MutexContentionOnExistingSymbols) { Thread::ThreadFactory& thread_factory = Thread::threadFactoryForTest(); MutexTracerImpl& mutex_tracer = MutexTracerImpl::getOrCreateTracer(); @@ -341,11 +468,11 @@ TEST_F(StatNameTest, MutexContentionOnExistingSymbols) { // Block each thread on waking up a common condition variable, // so we make it likely to race on creation. creation.wait(); - StatNameTempStorage initial(stat_name_string, table_); + StatNameTempStorage initial(stat_name_string, *table_); creates.DecrementCount(); access.wait(); - StatNameTempStorage second(stat_name_string, table_); + StatNameTempStorage second(stat_name_string, *table_); accesses.DecrementCount(); wait.wait(); @@ -386,8 +513,10 @@ TEST_F(StatNameTest, MutexContentionOnExistingSymbols) { } } -// Tests the memory savings realized from using symbol tables with 1k clusters. This -// test shows the memory drops from almost 8M to less than 2M. +// Tests the memory savings realized from using symbol tables with 1k +// clusters. This test shows the memory drops from almost 8M to less than +// 2M. Note that only SymbolTableImpl is tested for memory consumption, +// and not FakeSymbolTableImpl. TEST(SymbolTableTest, Memory) { if (!TestUtil::hasDeterministicMallocStats()) { return; @@ -411,7 +540,7 @@ TEST(SymbolTableTest, Memory) { string_mem_used = test_memory_usage(record_stat); } { - SymbolTable table; + SymbolTableImpl table; std::vector names; auto record_stat = [&names, &table](absl::string_view stat) { names.emplace_back(StatNameStorage(stat, table)); diff --git a/test/common/stats/symbol_table_speed_test.cc b/test/common/stats/symbol_table_speed_test.cc index 3d310f8e4a9bb..7273ce427e215 100644 --- a/test/common/stats/symbol_table_speed_test.cc +++ b/test/common/stats/symbol_table_speed_test.cc @@ -22,7 +22,7 @@ static void BM_CreateRace(benchmark::State& state) { threads.reserve(num_threads); Envoy::ConditionalInitializer access, wait; absl::BlockingCounter accesses(num_threads); - Envoy::Stats::SymbolTable table; + Envoy::Stats::SymbolTableImpl table; const absl::string_view stat_name_string = "here.is.a.stat.name"; Envoy::Stats::StatNameStorage initial(stat_name_string, table);