diff --git a/envoy/stats/BUILD b/envoy/stats/BUILD index ae9b06419d568..612ef108ad59e 100644 --- a/envoy/stats/BUILD +++ b/envoy/stats/BUILD @@ -52,10 +52,6 @@ envoy_cc_library( envoy_cc_library( name = "symbol_table_interface", hdrs = ["symbol_table.h"], - external_deps = ["abseil_inlined_vector"], - deps = [ - "//source/common/common:hash_lib", - ], ) envoy_cc_library( diff --git a/envoy/stats/symbol_table.h b/envoy/stats/symbol_table.h index 1d642b3fe8a08..39ac815d0e32c 100644 --- a/envoy/stats/symbol_table.h +++ b/envoy/stats/symbol_table.h @@ -1,249 +1,22 @@ #pragma once -#include -#include -#include -#include - -#include "envoy/common/pure.h" - -#include "absl/container/inlined_vector.h" -#include "absl/strings/string_view.h" - namespace Envoy { namespace Stats { -/** - * Runtime representation of an encoded stat name. This is predeclared only in - * the interface without abstract methods, because (a) the underlying class - * representation is common to both implementations of SymbolTable, and (b) - * we do not want or need the overhead of a vptr per StatName. The common - * declaration for StatName is in source/common/stats/symbol_table_impl.h - */ -class StatName; -using StatNameVec = absl::InlinedVector; - -class StatNameList; -class StatNameSet; - -using StatNameSetPtr = std::unique_ptr; +// Forward declarations for the symbol table classes. See +// source/common/stats/symbol_table_impl.h" for the class definitions. +// +// TODO(jmarantz): remove this file and put the forward declarations into stats.h. /** - * Holds a range of indexes indicating which parts of a stat-name are - * dynamic. This is used to transfer stats from hot-restart parent to child, - * retaining the same name structure. + * Runtime representation of an encoded stat name. */ -using DynamicSpan = std::pair; -using DynamicSpans = std::vector; +class StatName; /** - * SymbolTable manages a namespace optimized for stat names, exploiting their - * typical composition from "."-separated tokens, with a significant overlap - * between the tokens. The interface is designed to balance optimal storage - * at scale with hiding details from users. We seek to provide the most abstract - * interface possible that avoids adding per-stat overhead or taking locks in - * the hot path. + * Holds a set of symbols used to compose hierarhical names. */ -class SymbolTable { -public: - /** - * Efficient byte-encoded storage of an array of tokens. The most common - * tokens are typically < 127, and are represented directly. tokens >= 128 - * spill into the next byte, allowing for tokens of arbitrary numeric value to - * be stored. As long as the most common tokens are low-valued, the - * representation is space-efficient. This scheme is similar to UTF-8. The - * token ordering is dependent on the order in which stat-names are encoded - * into the SymbolTable, which will not be optimal, but in practice appears - * to be pretty good. - * - * This is exposed in the interface for the benefit of join(), which is - * used in the hot-path to append two stat-names into a temp without taking - * locks. This is used then in thread-local cache lookup, so that once warm, - * no locks are taken when looking up stats. - */ - using Storage = uint8_t[]; - using StoragePtr = std::unique_ptr; - - virtual ~SymbolTable() = default; - - /** - * @return uint64_t the number of symbols in the symbol table. - */ - virtual uint64_t numSymbols() const PURE; - - /** - * Decodes a vector of symbols back into its period-delimited stat name. If - * decoding fails on any part of the symbol_vec, we release_assert and crash, - * since this should never happen, and we don't want to continue running - * with a corrupt stats set. - * - * @param stat_name the stat name. - * @return std::string stringified stat_name. - */ - virtual std::string toString(const StatName& stat_name) const PURE; - - /** - * Determines whether one StatName lexically precedes another. Note that - * the lexical order may not exactly match the lexical order of the - * elaborated strings. For example, stat-name of "-.-" would lexically - * sort after "---" but when encoded as a StatName would come lexically - * earlier. In practice this is unlikely to matter as those are not - * reasonable names for Envoy stats. - * - * Note that this operation has to be performed with the context of the - * SymbolTable so that the individual Symbol objects can be converted - * into strings for lexical comparison. - * - * @param a the first stat name - * @param b the second stat name - * @return bool true if a lexically precedes b. - */ - virtual bool lessThan(const StatName& a, const StatName& b) const PURE; - virtual bool lessThanLockHeld(const StatName& a, const StatName& b) const PURE; - - /** - * Joins two or more StatNames. For example if we have StatNames for {"a.b", - * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The - * advantage of using this representation is that it avoids having to - * decode/encode into the elaborated form, and does not require locking the - * SymbolTable. - * - * Note that this method does not bump reference counts on the referenced - * Symbols in the SymbolTable, so it's only valid as long for the lifetime of - * the joined StatNames. - * - * This is intended for use doing cached name lookups of scoped stats, where - * the scope prefix and the names to combine it with are already in StatName - * form. Using this class, they can be combined without accessing the - * SymbolTable or, in particular, taking its lock. - * - * @param stat_names the names to join. - * @return Storage allocated for the joined name. - */ - virtual StoragePtr join(const StatNameVec& stat_names) const PURE; - - /** - * Populates a StatNameList from a list of encodings. This is not done at - * construction time to enable StatNameList to be instantiated directly in - * a class that doesn't have a live SymbolTable when it is constructed. - * - * @param names A pointer to the first name in an array, allocated by the caller. - * @param num_names The number of names. - * @param list The StatNameList representing the stat names. - */ - virtual void populateList(const StatName* names, uint32_t num_names, StatNameList& list) PURE; - -#ifndef ENVOY_CONFIG_COVERAGE - virtual void debugPrint() const PURE; -#endif - - using RecentLookupsFn = std::function; - - /** - * Calls the provided function with the name of the most recently looked-up - * symbols, including lookups on any StatNameSets, and with a count of - * the recent lookups on that symbol. - * - * @param iter the function to call for every recent item. - */ - virtual uint64_t getRecentLookups(const RecentLookupsFn& iter) const PURE; - - /** - * Clears the recent-lookups structures. - */ - virtual void clearRecentLookups() PURE; - - /** - * Sets the recent-lookup capacity. - */ - virtual void setRecentLookupCapacity(uint64_t capacity) PURE; - - /** - * @return The configured recent-lookup tracking capacity. - */ - virtual uint64_t recentLookupCapacity() const PURE; - - /** - * Creates a StatNameSet. - * - * @param name the name of the set. - * @return the set. - */ - virtual StatNameSetPtr makeSet(absl::string_view name) PURE; - - /** - * Identifies the dynamic components of a stat_name into an array of integer - * pairs, indicating the begin/end of spans of tokens in the stat-name that - * are created from StatNameDynamicStore or StatNameDynamicPool. - * - * This can be used to reconstruct the same exact StatNames in - * StatNames::mergeStats(), to enable stat continuity across hot-restart. - * - * @param stat_name the input stat name. - * @return the array of pairs indicating the bounds. - */ - virtual DynamicSpans getDynamicSpans(StatName stat_name) const PURE; - - /** - * Calls a function with the symbol table's lock held. This is needed - * for sortByStatName to avoid taking a lock on each comparison. - * - * TODO(jmarantz): This indirection can likely be removed once SymbolTable - * is changed from an interface to a concrete implementation. The interface - * was only longer needed during construction to allow for a fake symbol - * table implementation to be used by default and controlled by flag. - * - * @param fn a function to be called once the lock ahs been acquired. - */ - virtual void withLockHeld(std::function fn) const PURE; - -private: - friend struct HeapStatData; - friend class StatNameDynamicStorage; - friend class StatNameStorage; - friend class StatNameList; - friend class StatNameSet; - - // The following methods are private, but are called by friend classes - // StatNameStorage and StatNameList, which must be friendly with SymbolTable - // in order to manage the reference-counted symbols they own. - - /** - * Since SymbolTable does manual reference counting, a client of SymbolTable - * must manually call free(symbol_vec) when it is freeing the backing store - * for a StatName. This way, the symbol table will grow and shrink - * dynamically, instead of being write-only. - * - * @param stat_name the stat name. - */ - virtual void free(const StatName& stat_name) PURE; - - /** - * StatName backing-store can be managed by callers in a variety of ways - * to minimize overhead. But any persistent reference to a StatName needs - * to hold onto its own reference-counts for all symbols. This method - * helps callers ensure the symbol-storage is maintained for the lifetime - * of a reference. - * - * @param stat_name the stat name. - */ - virtual void incRefCount(const StatName& stat_name) PURE; - - /** - * Encodes 'name' into the symbol table. Bumps reference counts for referenced - * symbols. The caller must manage the storage, and is responsible for calling - * SymbolTable::free() to release the reference counts. - * - * @param name The name to encode. - * @return The encoded name, transferring ownership to the caller. - * - */ - virtual StoragePtr encode(absl::string_view name) PURE; - - virtual StoragePtr makeDynamicStorage(absl::string_view name) PURE; -}; - -using SymbolTablePtr = std::unique_ptr; +class SymbolTable; } // namespace Stats } // namespace Envoy diff --git a/source/common/stats/BUILD b/source/common/stats/BUILD index 122d5d66ca4a2..f4670d18109e7 100644 --- a/source/common/stats/BUILD +++ b/source/common/stats/BUILD @@ -89,7 +89,6 @@ envoy_cc_library( deps = [ ":symbol_table_lib", "//envoy/stats:stats_interface", - "//envoy/stats:symbol_table_interface", "//source/common/config:well_known_names", ], ) @@ -190,7 +189,10 @@ envoy_cc_library( name = "symbol_table_lib", srcs = ["symbol_table_impl.cc"], hdrs = ["symbol_table_impl.h"], - external_deps = ["abseil_base"], + external_deps = [ + "abseil_base", + "abseil_inlined_vector", + ], deps = [ ":recent_lookups_lib", "//envoy/stats:symbol_table_interface", diff --git a/source/common/stats/allocator_impl.cc b/source/common/stats/allocator_impl.cc index 7caf6c1f2e353..b61e079c7c7c9 100644 --- a/source/common/stats/allocator_impl.cc +++ b/source/common/stats/allocator_impl.cc @@ -5,7 +5,6 @@ #include "envoy/stats/sink.h" #include "envoy/stats/stats.h" -#include "envoy/stats/symbol_table.h" #include "source/common/common/hash.h" #include "source/common/common/lock_guard.h" diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 4184b10274763..a2b2d00fd3a60 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -35,7 +35,7 @@ size_t StatName::dataSize() const { if (size_and_data_ == nullptr) { return 0; } - return SymbolTableImpl::Encoding::decodeNumber(size_and_data_).first; + return SymbolTable::Encoding::decodeNumber(size_and_data_).first; } #ifndef ENVOY_CONFIG_COVERAGE @@ -51,7 +51,7 @@ void StatName::debugPrint() { for (size_t i = 0; i < nbytes; ++i) { std::cerr << " " << static_cast(data()[i]); } - const SymbolVec encoding = SymbolTableImpl::Encoding::decodeSymbols(*this); + const SymbolVec encoding = SymbolTable::Encoding::decodeSymbols(*this); std::cerr << ", numSymbols=" << encoding.size() << ":"; for (Symbol symbol : encoding) { std::cerr << " " << symbol; @@ -61,13 +61,13 @@ void StatName::debugPrint() { } #endif -SymbolTableImpl::Encoding::~Encoding() { +SymbolTable::Encoding::~Encoding() { // Verifies that moveToMemBlock() was called on this encoding. Failure // to call moveToMemBlock() will result in leaks symbols. ASSERT(mem_block_.capacity() == 0); } -size_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { +size_t SymbolTable::Encoding::encodingSizeBytes(uint64_t number) { size_t num_bytes = 0; do { ++num_bytes; @@ -76,8 +76,7 @@ size_t SymbolTableImpl::Encoding::encodingSizeBytes(uint64_t number) { return num_bytes; } -void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, - MemBlockBuilder& mem_block) { +void SymbolTable::Encoding::appendEncoding(uint64_t number, MemBlockBuilder& mem_block) { // UTF-8-like encoding where a value 127 or less gets written as a single // byte. For higher values we write the low-order 7 bits with a 1 in // the high-order bit. Then we right-shift 7 bits and keep adding more bytes @@ -95,7 +94,7 @@ void SymbolTableImpl::Encoding::appendEncoding(uint64_t number, } while (number != 0); } -void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { +void SymbolTable::Encoding::addSymbols(const std::vector& symbols) { ASSERT(data_bytes_required_ == 0); for (Symbol symbol : symbols) { data_bytes_required_ += encodingSizeBytes(symbol); @@ -106,7 +105,7 @@ void SymbolTableImpl::Encoding::addSymbols(const std::vector& symbols) { } } -std::pair SymbolTableImpl::Encoding::decodeNumber(const uint8_t* encoding) { +std::pair SymbolTable::Encoding::decodeNumber(const uint8_t* encoding) { uint64_t number = 0; uint64_t uc = SpilloverMask; const uint8_t* start = encoding; @@ -117,7 +116,7 @@ std::pair SymbolTableImpl::Encoding::decodeNumber(const uint8_ return std::make_pair(number, encoding - start); } -SymbolVec SymbolTableImpl::Encoding::decodeSymbols(StatName stat_name) { +SymbolVec SymbolTable::Encoding::decodeSymbols(StatName stat_name) { SymbolVec symbol_vec; symbol_vec.reserve(stat_name.dataSize()); decodeTokens( @@ -137,7 +136,7 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(StatName stat_name) { // The array-version of decodeSymbols is still a good approach for converting a // StatName to a string as the intermediate array of string_view is needed to // allocate the right size for the joined string. -class SymbolTableImpl::Encoding::TokenIter { +class SymbolTable::Encoding::TokenIter { public: // The type of token reached. enum class TokenType { StringView, Symbol, End }; @@ -208,7 +207,7 @@ class SymbolTableImpl::Encoding::TokenIter { #endif }; -void SymbolTableImpl::Encoding::decodeTokens( +void SymbolTable::Encoding::decodeTokens( StatName stat_name, const std::function& symbol_token_fn, const std::function& string_view_token_fn) { TokenIter iter(stat_name); @@ -224,7 +223,7 @@ void SymbolTableImpl::Encoding::decodeTokens( } bool StatName::startsWith(StatName prefix) const { - using TokenIter = SymbolTableImpl::Encoding::TokenIter; + using TokenIter = SymbolTable::Encoding::TokenIter; TokenIter prefix_iter(prefix); TokenIter this_iter(*this); while (true) { @@ -250,7 +249,7 @@ bool StatName::startsWith(StatName prefix) const { return true; // not reached } -std::vector SymbolTableImpl::decodeStrings(StatName stat_name) const { +std::vector SymbolTable::decodeStrings(StatName stat_name) const { std::vector strings; Thread::LockGuard lock(lock_); Encoding::decodeTokens( @@ -261,14 +260,14 @@ std::vector SymbolTableImpl::decodeStrings(StatName stat_name return strings; } -void SymbolTableImpl::Encoding::moveToMemBlock(MemBlockBuilder& mem_block) { +void SymbolTable::Encoding::moveToMemBlock(MemBlockBuilder& mem_block) { appendEncoding(data_bytes_required_, mem_block); mem_block.appendBlock(mem_block_); mem_block_.reset(); // Logically transfer ownership, enabling empty assert on destruct. } -void SymbolTableImpl::Encoding::appendToMemBlock(StatName stat_name, - MemBlockBuilder& mem_block) { +void SymbolTable::Encoding::appendToMemBlock(StatName stat_name, + MemBlockBuilder& mem_block) { const uint8_t* data = stat_name.dataIncludingSize(); if (data == nullptr) { mem_block.appendOne(0); @@ -277,11 +276,11 @@ void SymbolTableImpl::Encoding::appendToMemBlock(StatName stat_name, } } -SymbolTableImpl::SymbolTableImpl() +SymbolTable::SymbolTable() // Have to be explicitly initialized, if we want to use the ABSL_GUARDED_BY macro. : next_symbol_(FirstValidSymbol), monotonic_counter_(FirstValidSymbol) {} -SymbolTableImpl::~SymbolTableImpl() { +SymbolTable::~SymbolTable() { // To avoid leaks into the symbol table, we expect all StatNames to be freed. // Note: this could potentially be short-circuited if we decide a fast exit // is needed in production. But it would be good to ensure clean up during @@ -292,7 +291,7 @@ SymbolTableImpl::~SymbolTableImpl() { // TODO(ambuc): There is a possible performance optimization here for avoiding // the encoding of IPs / numbers if they appear in stat names. We don't want to // waste time symbolizing an integer as an integer, if we can help it. -void SymbolTableImpl::addTokensToEncoding(const absl::string_view name, Encoding& encoding) { +void SymbolTable::addTokensToEncoding(const absl::string_view name, Encoding& encoding) { if (name.empty()) { return; } @@ -321,17 +320,17 @@ void SymbolTableImpl::addTokensToEncoding(const absl::string_view name, Encoding encoding.addSymbols(symbols); } -uint64_t SymbolTableImpl::numSymbols() const { +uint64_t SymbolTable::numSymbols() const { Thread::LockGuard lock(lock_); ASSERT(encode_map_.size() == decode_map_.size()); return encode_map_.size(); } -std::string SymbolTableImpl::toString(const StatName& stat_name) const { +std::string SymbolTable::toString(const StatName& stat_name) const { return absl::StrJoin(decodeStrings(stat_name), "."); } -void SymbolTableImpl::incRefCount(const StatName& stat_name) { +void SymbolTable::incRefCount(const StatName& stat_name) { // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. const SymbolVec symbols = Encoding::decodeSymbols(stat_name); @@ -353,7 +352,7 @@ void SymbolTableImpl::incRefCount(const StatName& stat_name) { } } -void SymbolTableImpl::free(const StatName& stat_name) { +void SymbolTable::free(const StatName& stat_name) { // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. const SymbolVec symbols = Encoding::decodeSymbols(stat_name); @@ -379,7 +378,7 @@ void SymbolTableImpl::free(const StatName& stat_name) { } } -uint64_t SymbolTableImpl::getRecentLookups(const RecentLookupsFn& iter) const { +uint64_t SymbolTable::getRecentLookups(const RecentLookupsFn& iter) const { uint64_t total = 0; absl::flat_hash_map name_count_map; @@ -409,7 +408,7 @@ uint64_t SymbolTableImpl::getRecentLookups(const RecentLookupsFn& iter) const { return total; } -DynamicSpans SymbolTableImpl::getDynamicSpans(StatName stat_name) const { +DynamicSpans SymbolTable::getDynamicSpans(StatName stat_name) const { DynamicSpans dynamic_spans; uint32_t index = 0; @@ -435,28 +434,28 @@ DynamicSpans SymbolTableImpl::getDynamicSpans(StatName stat_name) const { return dynamic_spans; } -void SymbolTableImpl::setRecentLookupCapacity(uint64_t capacity) { +void SymbolTable::setRecentLookupCapacity(uint64_t capacity) { Thread::LockGuard lock(lock_); recent_lookups_.setCapacity(capacity); } -void SymbolTableImpl::clearRecentLookups() { +void SymbolTable::clearRecentLookups() { Thread::LockGuard lock(lock_); recent_lookups_.clear(); } -uint64_t SymbolTableImpl::recentLookupCapacity() const { +uint64_t SymbolTable::recentLookupCapacity() const { Thread::LockGuard lock(lock_); return recent_lookups_.capacity(); } -StatNameSetPtr SymbolTableImpl::makeSet(absl::string_view name) { - // make_unique does not work with private ctor, even though SymbolTableImpl is a friend. +StatNameSetPtr SymbolTable::makeSet(absl::string_view name) { + // make_unique does not work with private ctor, even though SymbolTable is a friend. StatNameSetPtr stat_name_set(new StatNameSet(*this, name)); return stat_name_set; } -Symbol SymbolTableImpl::toSymbol(absl::string_view sv) { +Symbol SymbolTable::toSymbol(absl::string_view sv) { Symbol result; auto encode_find = encode_map_.find(sv); // If the string segment doesn't already exist, @@ -482,14 +481,14 @@ Symbol SymbolTableImpl::toSymbol(absl::string_view sv) { return result; } -absl::string_view SymbolTableImpl::fromSymbol(const Symbol symbol) const +absl::string_view SymbolTable::fromSymbol(const Symbol symbol) const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { auto search = decode_map_.find(symbol); RELEASE_ASSERT(search != decode_map_.end(), "no such symbol"); return search->second->toStringView(); } -void SymbolTableImpl::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { +void SymbolTable::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { if (pool_.empty()) { next_symbol_ = ++monotonic_counter_; } else { @@ -500,7 +499,7 @@ void SymbolTableImpl::newSymbol() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { ASSERT(monotonic_counter_ != 0); } -bool SymbolTableImpl::lessThan(const StatName& a, const StatName& b) const { +bool SymbolTable::lessThan(const StatName& a, const StatName& b) const { // Proactively take the table lock in anticipation that we'll need to // convert at least one symbol to a string_view, and it's easier not to // bother to lazily take the lock. @@ -508,7 +507,7 @@ bool SymbolTableImpl::lessThan(const StatName& a, const StatName& b) const { return lessThanLockHeld(a, b); } -bool SymbolTableImpl::lessThanLockHeld(const StatName& a, const StatName& b) const +bool SymbolTable::lessThanLockHeld(const StatName& a, const StatName& b) const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { Encoding::TokenIter a_iter(a), b_iter(b); while (true) { @@ -538,7 +537,7 @@ bool SymbolTableImpl::lessThanLockHeld(const StatName& a, const StatName& b) con } #ifndef ENVOY_CONFIG_COVERAGE -void SymbolTableImpl::debugPrint() const { +void SymbolTable::debugPrint() const { Thread::LockGuard lock(lock_); std::vector symbols; for (const auto& p : decode_map_) { @@ -553,7 +552,7 @@ void SymbolTableImpl::debugPrint() const { } #endif -SymbolTable::StoragePtr SymbolTableImpl::encode(absl::string_view name) { +SymbolTable::StoragePtr SymbolTable::encode(absl::string_view name) { name = StringUtil::removeTrailingCharacters(name, '.'); Encoding encoding; addTokensToEncoding(name, encoding); @@ -573,7 +572,7 @@ StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { table.incRefCount(statName()); } -SymbolTable::StoragePtr SymbolTableImpl::makeDynamicStorage(absl::string_view name) { +SymbolTable::StoragePtr SymbolTable::makeDynamicStorage(absl::string_view name) { name = StringUtil::removeTrailingCharacters(name, '.'); // For all StatName objects, we first have the total number of bytes in the @@ -587,16 +586,16 @@ SymbolTable::StoragePtr SymbolTableImpl::makeDynamicStorage(absl::string_view na // payload_bytes is the total number of bytes needed to represent the // characters in name, plus their encoded size, plus the literal indicator. - const size_t payload_bytes = SymbolTableImpl::Encoding::totalSizeBytes(name.size()) + 1; + const size_t payload_bytes = Encoding::totalSizeBytes(name.size()) + 1; // total_bytes includes the payload_bytes, plus the LiteralStringIndicator, and // the length of those. - const size_t total_bytes = SymbolTableImpl::Encoding::totalSizeBytes(payload_bytes); + const size_t total_bytes = Encoding::totalSizeBytes(payload_bytes); MemBlockBuilder mem_block(total_bytes); - SymbolTableImpl::Encoding::appendEncoding(payload_bytes, mem_block); + Encoding::appendEncoding(payload_bytes, mem_block); mem_block.appendOne(LiteralStringIndicator); - SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); + Encoding::appendEncoding(name.size(), mem_block); mem_block.appendData(absl::MakeSpan(reinterpret_cast(name.data()), name.size())); ASSERT(mem_block.capacityRemaining() == 0); return mem_block.release(); @@ -664,7 +663,7 @@ void StatNameStorageSet::free(SymbolTable& symbol_table) { } } -SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) const { +SymbolTable::StoragePtr SymbolTable::join(const StatNameVec& stat_names) const { size_t num_bytes = 0; for (StatName stat_name : stat_names) { if (!stat_name.empty()) { @@ -680,7 +679,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con return mem_block.release(); } -void SymbolTableImpl::populateList(const StatName* names, uint32_t num_names, StatNameList& list) { +void SymbolTable::populateList(const StatName* names, uint32_t num_names, StatNameList& list) { RELEASE_ASSERT(num_names < 256, "Maximum number elements in a StatNameList exceeded"); // First encode all the names. diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index 6e6bef24175b5..71baef0b74acf 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -1,15 +1,11 @@ #pragma once #include -#include #include #include #include #include -#include "envoy/common/exception.h" -#include "envoy/stats/symbol_table.h" - #include "source/common/common/assert.h" #include "source/common/common/hash.h" #include "source/common/common/lock_guard.h" @@ -21,12 +17,27 @@ #include "absl/container/fixed_array.h" #include "absl/container/flat_hash_map.h" +#include "absl/container/inlined_vector.h" #include "absl/strings/str_join.h" #include "absl/strings/str_split.h" namespace Envoy { namespace Stats { +class StatName; +using StatNameVec = absl::InlinedVector; +class StatNameList; +class StatNameSet; +using StatNameSetPtr = std::unique_ptr; + +/** + * Holds a range of indexes indicating which parts of a stat-name are + * dynamic. This is used to transfer stats from hot-restart parent to child, + * retaining the same name structure. + */ +using DynamicSpan = std::pair; +using DynamicSpans = std::vector; + /** A Symbol represents a string-token with a small index. */ using Symbol = uint32_t; @@ -63,8 +74,26 @@ using SymbolVec = std::vector; * same string is re-encoded, it may or may not encode to the same underlying * symbol. */ -class SymbolTableImpl : public SymbolTable { +class SymbolTable final { public: + /** + * Efficient byte-encoded storage of an array of tokens. The most common + * tokens are typically < 127, and are represented directly. tokens >= 128 + * spill into the next byte, allowing for tokens of arbitrary numeric value to + * be stored. As long as the most common tokens are low-valued, the + * representation is space-efficient. This scheme is similar to UTF-8. The + * token ordering is dependent on the order in which stat-names are encoded + * into the SymbolTable, which will not be optimal, but in practice appears + * to be pretty good. + * + * This is exposed in the interface for the benefit of join(), which is + * used in the hot-path to append two stat-names into a temp without taking + * locks. This is used then in thread-local cache lookup, so that once warm, + * no locks are taken when looking up stats. + */ + using Storage = uint8_t[]; + using StoragePtr = std::unique_ptr; + /** * Intermediate representation for a stat-name. This helps store multiple * names in a single packed allocation. First we encode each desired name, @@ -169,52 +198,203 @@ class SymbolTableImpl : public SymbolTable { private: friend class StatName; - friend class SymbolTableImpl; + friend class SymbolTable; class TokenIter; size_t data_bytes_required_{0}; MemBlockBuilder mem_block_; }; - SymbolTableImpl(); - ~SymbolTableImpl() override; + SymbolTable(); + ~SymbolTable(); + + /** + * Decodes a vector of symbols back into its period-delimited stat name. If + * decoding fails on any part of the symbol_vec, we release_assert and crash, + * since this should never happen, and we don't want to continue running + * with a corrupt stats set. + * + * @param stat_name the stat name. + * @return std::string stringified stat_name. + */ + std::string toString(const StatName& stat_name) const; + + /** + * @return uint64_t the number of symbols in the symbol table. + */ + uint64_t numSymbols() const; - // SymbolTable - std::string toString(const StatName& stat_name) const override; - uint64_t numSymbols() const override; - bool lessThan(const StatName& a, const StatName& b) const override; - void free(const StatName& stat_name) override; - void incRefCount(const StatName& stat_name) override; - StoragePtr join(const StatNameVec& stat_names) const override; - void populateList(const StatName* names, uint32_t num_names, StatNameList& list) override; - StoragePtr encode(absl::string_view name) override; - StoragePtr makeDynamicStorage(absl::string_view name) override; + /** + * Determines whether one StatName lexically precedes another. Note that + * the lexical order may not exactly match the lexical order of the + * elaborated strings. For example, stat-name of "-.-" would lexically + * sort after "---" but when encoded as a StatName would come lexically + * earlier. In practice this is unlikely to matter as those are not + * reasonable names for Envoy stats. + * + * Note that this operation has to be performed with the context of the + * SymbolTable so that the individual Symbol objects can be converted + * into strings for lexical comparison. + * + * @param a the first stat name + * @param b the second stat name + * @return bool true if a lexically precedes b. + */ + bool lessThan(const StatName& a, const StatName& b) const; + + /** + * Joins two or more StatNames. For example if we have StatNames for {"a.b", + * "c.d", "e.f"} then the joined stat-name matches "a.b.c.d.e.f". The + * advantage of using this representation is that it avoids having to + * decode/encode into the elaborated form, and does not require locking the + * SymbolTable. + * + * Note that this method does not bump reference counts on the referenced + * Symbols in the SymbolTable, so it's only valid as long for the lifetime of + * the joined StatNames. + * + * This is intended for use doing cached name lookups of scoped stats, where + * the scope prefix and the names to combine it with are already in StatName + * form. Using this class, they can be combined without accessing the + * SymbolTable or, in particular, taking its lock. + * + * @param stat_names the names to join. + * @return Storage allocated for the joined name. + */ + StoragePtr join(const StatNameVec& stat_names) const; + + /** + * Populates a StatNameList from a list of encodings. This is not done at + * construction time to enable StatNameList to be instantiated directly in + * a class that doesn't have a live SymbolTable when it is constructed. + * + * @param names A pointer to the first name in an array, allocated by the caller. + * @param num_names The number of names. + * @param list The StatNameList representing the stat names. + */ + void populateList(const StatName* names, uint32_t num_names, StatNameList& list); #ifndef ENVOY_CONFIG_COVERAGE - void debugPrint() const override; + void debugPrint() const; #endif - StatNameSetPtr makeSet(absl::string_view name) override; - uint64_t getRecentLookups(const RecentLookupsFn&) const override; - void clearRecentLookups() override; - void setRecentLookupCapacity(uint64_t capacity) override; - uint64_t recentLookupCapacity() const override; - DynamicSpans getDynamicSpans(StatName stat_name) const override; - void withLockHeld(std::function fn) const override { - Thread::LockGuard lock(lock_); - fn(); - } + /** + * Creates a StatNameSet. + * + * @param name the name of the set. + * @return the set. + */ + StatNameSetPtr makeSet(absl::string_view name); + + using RecentLookupsFn = std::function; /** - * See doc for lessThan(). This variant requires the lock be taken - * before calling. It is used to help sort() speed. + * Calls the provided function with the name of the most recently looked-up + * symbols, including lookups on any StatNameSets, and with a count of + * the recent lookups on that symbol. + * + * @param iter the function to call for every recent item. + */ + uint64_t getRecentLookups(const RecentLookupsFn&) const; + + /** + * Clears the recent-lookups structures. */ - bool lessThanLockHeld(const StatName& a, const StatName& b) const override; + void clearRecentLookups(); + + /** + * Sets the recent-lookup capacity. + */ + void setRecentLookupCapacity(uint64_t capacity); + + /** + * @return The configured recent-lookup tracking capacity. + */ + uint64_t recentLookupCapacity() const; + + /** + * Identifies the dynamic components of a stat_name into an array of integer + * pairs, indicating the begin/end of spans of tokens in the stat-name that + * are created from StatNameDynamicStore or StatNameDynamicPool. + * + * This can be used to reconstruct the same exact StatNames in + * StatNames::mergeStats(), to enable stat continuity across hot-restart. + * + * @param stat_name the input stat name. + * @return the array of pairs indicating the bounds. + */ + DynamicSpans getDynamicSpans(StatName stat_name) const; + + bool lessThanLockHeld(const StatName& a, const StatName& b) const; + + template struct StatNameCompare { + StatNameCompare(const SymbolTable& symbol_table, GetStatName getter) + : symbol_table_(symbol_table), getter_(getter) {} + + bool operator()(const Obj& a, const Obj& b) const; + + const SymbolTable& symbol_table_; + GetStatName getter_; + }; + + /** + * Sorts a range by StatName. This API is more efficient than + * calling std::sort directly as it takes a single lock for the + * entire sort, rather than locking on each comparison. + * + * @param begin the beginning of the range to sort + * @param end the end of the range to sort + * @param get_stat_name a functor that takes an Obj and returns a StatName. + */ + template + void sortByStatNames(Iter begin, Iter end, GetStatName get_stat_name) const { + // Grab the lock once before sorting begins, so we don't have to re-take + // it on every comparison. + Thread::LockGuard lock(lock_); + StatNameCompare compare(*this, get_stat_name); + std::sort(begin, end, compare); + } private: friend class StatName; friend class StatNameTest; friend class StatNameDeathTest; + friend class StatNameDynamicStorage; + friend class StatNameList; + friend class StatNameStorage; + + /** + * Encodes 'name' into the symbol table. Bumps reference counts for referenced + * symbols. The caller must manage the storage, and is responsible for calling + * SymbolTable::free() to release the reference counts. + * + * @param name The name to encode. + * @return The encoded name, transferring ownership to the caller. + * + */ + StoragePtr encode(absl::string_view name); + StoragePtr makeDynamicStorage(absl::string_view name); + + /** + * Since SymbolTable does manual reference counting, a client of SymbolTable + * must manually call free(symbol_vec) when it is freeing the backing store + * for a StatName. This way, the symbol table will grow and shrink + * dynamically, instead of being write-only. + * + * @param stat_name the stat name. + */ + void free(const StatName& stat_name); + + /** + * StatName backing-store can be managed by callers in a variety of ways + * to minimize overhead. But any persistent reference to a StatName needs + * to hold onto its own reference-counts for all symbols. This method + * helps callers ensure the symbol-storage is maintained for the lifetime + * of a reference. + * + * @param stat_name the stat name. + */ + void incRefCount(const StatName& stat_name); struct SharedSymbol { SharedSymbol(Symbol symbol) : symbol_(symbol), ref_count_(1) {} @@ -422,7 +602,7 @@ class StatName { * @return size_t the number of bytes in the symbol array, including the * overhead for the size itself. */ - size_t size() const { return SymbolTableImpl::Encoding::totalSizeBytes(dataSize()); } + size_t size() const { return SymbolTable::Encoding::totalSizeBytes(dataSize()); } /** * Copies the entire StatName representation into a MemBlockBuilder, including @@ -459,7 +639,7 @@ class StatName { if (size_and_data_ == nullptr) { return nullptr; } - return size_and_data_ + SymbolTableImpl::Encoding::encodingSizeBytes(dataSize()); + return size_and_data_ + SymbolTable::Encoding::encodingSizeBytes(dataSize()); } const uint8_t* dataIncludingSize() const { return size_and_data_; } @@ -536,9 +716,9 @@ class StatNameManagedStorage : public StatNameStorage { */ class StatNameDynamicStorage : public StatNameStorageBase { public: - // Basic constructor based on a name. Note the table is used for - // a virtual-function call to encode the name, but no locks are taken - // in either implementation of the SymbolTable api. + // Basic constructor based on a name. Note the table is used for a call to + // encode the name, but no locks are taken in either implementation of the + // SymbolTable api. StatNameDynamicStorage(absl::string_view name, SymbolTable& table) : StatNameStorageBase(table.makeDynamicStorage(name)) {} // Move constructor. @@ -675,7 +855,7 @@ class StatNameList { void clear(SymbolTable& symbol_table); private: - friend class SymbolTableImpl; + friend class SymbolTable; /** * Moves the specified storage into the list. The storage format is an @@ -688,8 +868,8 @@ class StatNameList { * ... * * - * For SymbolTableImpl, each symbol is 1 or more bytes, in a variable-length - * encoding. See SymbolTableImpl::Encoding::addSymbol for details. + * For SymbolTable, each symbol is 1 or more bytes, in a variable-length + * encoding. See SymbolTable::Encoding::addSymbol for details. */ void moveStorageIntoList(SymbolTable::StoragePtr&& storage) { storage_ = std::move(storage); } @@ -861,7 +1041,7 @@ class StatNameSet { } private: - friend class SymbolTableImpl; + friend class SymbolTable; StatNameSet(SymbolTable& symbol_table, absl::string_view name); @@ -872,49 +1052,18 @@ class StatNameSet { StringStatNameMap builtin_stat_names_; }; -/** - * Sorts a range by StatName. This API is more efficient than - * calling std::sort directly as it takes a single lock for the - * entire sort, rather than locking on each comparison. - * - * This is a free function rather than a method of SymbolTable because - * SymbolTable is an abstract class and it's hard to make a virtual template - * function. - * - * @param symbol_table the symbol table that owns the StatNames. - * @param begin the beginning of the range to sort - * @param end the end of the range to sort - * @param get_stat_name a functor that takes an Obj and returns a StatName. - */ -template -void sortByStatNames(const SymbolTable& symbol_table, Iter begin, Iter end, - GetStatName get_stat_name) { - - struct Compare { - Compare(const SymbolTable& symbol_table, GetStatName getter) - : symbol_table_(symbol_table), getter_(getter) {} +template +bool SymbolTable::StatNameCompare::operator()(const Obj& a, const Obj& b) const { + StatName a_stat_name = getter_(a); + StatName b_stat_name = getter_(b); + return symbol_table_.lessThanLockHeld(a_stat_name, b_stat_name); +} - bool operator()(const Obj& a, const Obj& b) const { - StatName a_stat_name = getter_(a); - StatName b_stat_name = getter_(b); - return symbol_table_.lessThanLockHeld(a_stat_name, b_stat_name); - } +using SymbolTablePtr = std::unique_ptr; - const SymbolTable& symbol_table_; - GetStatName getter_; - }; - - // Grab the lock once before sorting begins, so we don't have to re-take - // it on every comparison. - // - // TODO(jmarantz): Once SymbolTable is changed to a concrete class from an - // interface, we'll be able to change this free function into a method, - // take the lock directly, and remove the withLockHeld method. - symbol_table.withLockHeld([begin, end, get_stat_name, &symbol_table]() { - Compare compare(symbol_table, get_stat_name); - std::sort(begin, end, compare); - }); -} +// TODO(jmarantz): rename all remaining ~47 occurrences of SymbolTableImpl in +// the codebase to SymbolTable, and drop this alias. +using SymbolTableImpl = SymbolTable; } // namespace Stats } // namespace Envoy diff --git a/source/docs/stats.md b/source/docs/stats.md index de9a92048f696..54d8b13053184 100644 --- a/source/docs/stats.md +++ b/source/docs/stats.md @@ -204,9 +204,7 @@ occurring during via an admin endpoint that shows 20 recent lookups by name, at Class | Superclass | Description -----| ---------- | --------- -SymbolTable | | Abstract class providing an interface for symbol tables -SymbolTableImpl | SymbolTable | Implementation of SymbolTable API where StatName share symbols held in a table -SymbolTableImpl::Encoding | | Helper class for incrementally encoding strings into symbols +SymbolTable | | Holds a table of dot-separated names with shared tokens StatName | | Provides an API and a view into a StatName (dynamic or symbolized). Like absl::string_view, the backing store must be separately maintained. StatNameStorageBase | | Holds storage (an array of bytes) for a dynamic or symbolized StatName StatNameStorage | StatNameStorageBase | Holds storage for a symbolized StatName. Must be explicitly freed (not just destructed). diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index f5af9675bc06d..ee76b37a3c373 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -435,10 +435,10 @@ TEST_F(StatNameTest, Sort) { const StatNameVec sorted_names{makeStat("a.b"), makeStat("a.c"), makeStat("a.c"), makeStat("d.a"), makeStat("d.a.a"), makeStat("d.e")}; EXPECT_NE(names, sorted_names); - struct Compare { + struct GetStatName { StatName operator()(const StatName& stat_name) const { return stat_name; } }; - sortByStatNames(table_, names.begin(), names.end(), Compare()); + table_.sortByStatNames(names.begin(), names.end(), GetStatName()); EXPECT_EQ(names, sorted_names); } diff --git a/test/common/stats/symbol_table_speed_test.cc b/test/common/stats/symbol_table_speed_test.cc index e62ff55da1082..40cc5d11cb55b 100644 --- a/test/common/stats/symbol_table_speed_test.cc +++ b/test/common/stats/symbol_table_speed_test.cc @@ -153,8 +153,7 @@ static void bmSortByStatNames(benchmark::State& state) { for (auto _ : state) { UNREFERENCED_PARAMETER(_); std::vector sort = names; - Envoy::Stats::sortByStatNames(symbol_table, sort.begin(), sort.end(), - getter); + symbol_table.sortByStatNames(sort.begin(), sort.end(), getter); } } BENCHMARK(bmSortByStatNames);