diff --git a/include/envoy/stats/symbol_table.h b/include/envoy/stats/symbol_table.h index 1eeb9778f1103..e5531e46da33b 100644 --- a/include/envoy/stats/symbol_table.h +++ b/include/envoy/stats/symbol_table.h @@ -182,6 +182,7 @@ class SymbolTable { private: friend struct HeapStatData; + friend class StatNameDynamicStorage; friend class StatNameStorage; friend class StatNameList; friend class StatNameSet; @@ -222,12 +223,7 @@ class SymbolTable { */ virtual StoragePtr encode(absl::string_view name) PURE; - /** - * Called by StatNameSet's destructor. - * - * @param stat_name_set the set. - */ - virtual void forgetSet(StatNameSet& stat_name_set) PURE; + virtual StoragePtr makeDynamicStorage(absl::string_view name) PURE; }; using SymbolTablePtr = std::unique_ptr; diff --git a/source/common/http/codes.cc b/source/common/http/codes.cc index 9fdf0028b4bfe..cf291af32dfba 100644 --- a/source/common/http/codes.cc +++ b/source/common/http/codes.cc @@ -158,13 +158,6 @@ void CodeStatsImpl::chargeResponseTiming(const ResponseTimingInfo& info) const { } } -absl::string_view CodeStatsImpl::stripTrailingDot(absl::string_view str) { - if (absl::EndsWith(str, ".")) { - str.remove_suffix(1); - } - return str; -} - Stats::StatName CodeStatsImpl::upstreamRqGroup(Code response_code) const { switch (enumToInt(response_code) / 100) { case 1: @@ -187,7 +180,7 @@ Stats::StatName CodeStatsImpl::upstreamRqStatName(Code response_code) const { if (rc_index >= NumHttpCodes) { return upstream_rq_unknown_; } - std::atomic& atomic_ref = rc_stat_names_[rc_index]; + std::atomic& atomic_ref = rc_stat_names_[rc_index]; if (atomic_ref.load() == nullptr) { absl::MutexLock lock(&mutex_); diff --git a/source/common/http/codes.h b/source/common/http/codes.h index 3b8a2d6fb4eff..dcfa4e37df507 100644 --- a/source/common/http/codes.h +++ b/source/common/http/codes.h @@ -59,7 +59,6 @@ class CodeStatsImpl : public CodeStats { void recordHistogram(Stats::Scope& scope, const Stats::StatNameVec& names, Stats::Histogram::Unit unit, uint64_t count) const; - static absl::string_view stripTrailingDot(absl::string_view prefix); Stats::StatName upstreamRqGroup(Code response_code) const; Stats::StatName upstreamRqStatName(Code response_code) const; @@ -109,7 +108,7 @@ class CodeStatsImpl : public CodeStats { static constexpr uint32_t NumHttpCodes = 500; static constexpr uint32_t HttpCodeOffset = 100; // code 100 is at index 0. - mutable std::atomic rc_stat_names_[NumHttpCodes]; + mutable std::atomic rc_stat_names_[NumHttpCodes]; }; /** diff --git a/source/common/router/router.cc b/source/common/router/router.cc index 721eaa0b25660..a5c5ee7689785 100644 --- a/source/common/router/router.cc +++ b/source/common/router/router.cc @@ -493,11 +493,7 @@ Http::FilterHeadersStatus Filter::decodeHeaders(Http::HeaderMap& headers, bool e const Http::HeaderEntry* request_alt_name = headers.EnvoyUpstreamAltStatName(); if (request_alt_name) { - // TODO(#7003): converting this header value into a StatName requires - // taking a global symbol-table lock. This is not a frequently used feature, - // but may not be the only occurrence of this pattern, where it's difficult - // or impossible to pre-compute a StatName for a component of a stat name. - alt_stat_prefix_ = std::make_unique( + alt_stat_prefix_ = std::make_unique( request_alt_name->value().getStringView(), config_.scope_.symbolTable()); headers.removeEnvoyUpstreamAltStatName(); } diff --git a/source/common/router/router.h b/source/common/router/router.h index 81dbf37a0b1e5..956e365229b81 100644 --- a/source/common/router/router.h +++ b/source/common/router/router.h @@ -565,7 +565,7 @@ class Filter : Logger::Loggable, RouteConstSharedPtr route_; const RouteEntry* route_entry_{}; Upstream::ClusterInfoConstSharedPtr cluster_; - std::unique_ptr alt_stat_prefix_; + std::unique_ptr alt_stat_prefix_; const VirtualCluster* request_vcluster_; Event::TimerPtr response_timeout_; FilterUtility::TimeoutData timeout_; diff --git a/source/common/stats/fake_symbol_table_impl.h b/source/common/stats/fake_symbol_table_impl.h index a64325cf1177f..ddfad21d64906 100644 --- a/source/common/stats/fake_symbol_table_impl.h +++ b/source/common/stats/fake_symbol_table_impl.h @@ -75,12 +75,11 @@ class FakeSymbolTableImpl : public SymbolTable { MemBlockBuilder mem_block(total_size_bytes); mem_block.appendOne(num_names); for (uint32_t i = 0; i < num_names; ++i) { - auto& name = names[i]; - size_t sz = name.size(); + absl::string_view name = names[i]; + const size_t sz = name.size(); SymbolTableImpl::Encoding::appendEncoding(sz, mem_block); if (!name.empty()) { - mem_block.appendData( - absl::MakeConstSpan(reinterpret_cast(name.data()), sz)); + mem_block.appendData(absl::MakeSpan(reinterpret_cast(name.data()), sz)); } } @@ -102,6 +101,7 @@ class FakeSymbolTableImpl : public SymbolTable { void free(const StatName&) override {} void incRefCount(const StatName&) override {} StoragePtr encode(absl::string_view name) override { return encodeHelper(name); } + StoragePtr makeDynamicStorage(absl::string_view name) override { return encodeHelper(name); } SymbolTable::StoragePtr join(const std::vector& names) const override { std::vector strings; for (StatName name : names) { @@ -125,7 +125,6 @@ class FakeSymbolTableImpl : public SymbolTable { // make_unique does not work with private ctor, even though FakeSymbolTableImpl is a friend. return StatNameSetPtr(new StatNameSet(*this, name)); } - void forgetSet(StatNameSet&) override {} uint64_t getRecentLookups(const RecentLookupsFn&) const override { return 0; } void clearRecentLookups() override {} void setRecentLookupCapacity(uint64_t) override {} @@ -142,7 +141,8 @@ class FakeSymbolTableImpl : public SymbolTable { MemBlockBuilder mem_block(SymbolTableImpl::Encoding::totalSizeBytes(name.size())); SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); mem_block.appendData( - absl::MakeConstSpan(reinterpret_cast(name.data()), name.size())); + absl::MakeSpan(reinterpret_cast(name.data()), name.size())); + ASSERT(mem_block.capacityRemaining() == 0); return mem_block.release(); } }; diff --git a/source/common/stats/symbol_table_impl.cc b/source/common/stats/symbol_table_impl.cc index 9ce19a85bedb6..164ff9ec2999c 100644 --- a/source/common/stats/symbol_table_impl.cc +++ b/source/common/stats/symbol_table_impl.cc @@ -14,8 +14,22 @@ namespace Envoy { namespace Stats { -static const uint32_t SpilloverMask = 0x80; -static const uint32_t Low7Bits = 0x7f; +// Masks used for variable-length encoding of arbitrary-sized integers into a +// uint8-array. The integers are typically small, so we try to store them in as +// few bytes as possible. The bottom 7 bits hold values, and the top bit is used +// to determine whether another byte is needed for more data. +static constexpr uint32_t SpilloverMask = 0x80; +static constexpr uint32_t Low7Bits = 0x7f; + +// When storing Symbol arrays, we disallow Symbol 0, which is the only Symbol +// that will decode into uint8_t array starting (and ending) with {0}. Thus we +// can use a leading 0 in the first byte to indicate that what follows is +// literal char data, rather than symbol-table entries. Literal char data is +// used for dynamically discovered stat-name tokens where you don't want to take +// a symbol table lock, and would rather pay extra memory overhead to store the +// tokens as fully elaborated strings. +static constexpr Symbol FirstValidSymbol = 1; +static constexpr uint8_t LiteralStringIndicator = 0; uint64_t StatName::dataSize() const { if (size_and_data_ == nullptr) { @@ -104,13 +118,50 @@ SymbolVec SymbolTableImpl::Encoding::decodeSymbols(const SymbolTable::Storage ar uint64_t size) { SymbolVec symbol_vec; symbol_vec.reserve(size); + decodeTokens( + array, size, [&symbol_vec](Symbol symbol) { symbol_vec.push_back(symbol); }, + [](absl::string_view) {}); + return symbol_vec; +} + +void SymbolTableImpl::Encoding::decodeTokens( + const SymbolTable::Storage array, uint64_t size, + const std::function& symbolTokenFn, + const std::function& stringViewTokenFn) { while (size > 0) { - std::pair symbol_consumed = decodeNumber(array); - symbol_vec.push_back(symbol_consumed.first); - size -= symbol_consumed.second; - array += symbol_consumed.second; + if (*array == LiteralStringIndicator) { + // To avoid scanning memory to find the literal size during decode, we + // var-length encode the size of the literal string prior to the data. + ASSERT(size > 1); + ++array; + --size; + std::pair length_consumed = decodeNumber(array); + uint64_t length = length_consumed.first; + array += length_consumed.second; + size -= length_consumed.second; + ASSERT(size >= length); + stringViewTokenFn(absl::string_view(reinterpret_cast(array), length)); + size -= length; + array += length; + } else { + std::pair symbol_consumed = decodeNumber(array); + symbolTokenFn(symbol_consumed.first); + size -= symbol_consumed.second; + array += symbol_consumed.second; + } } - return symbol_vec; +} + +std::vector SymbolTableImpl::decodeStrings(const SymbolTable::Storage array, + uint64_t size) const { + std::vector strings; + Thread::LockGuard lock(lock_); + Encoding::decodeTokens( + array, size, + [this, &strings](Symbol symbol) + NO_THREAD_SAFETY_ANALYSIS { strings.push_back(fromSymbol(symbol)); }, + [&strings](absl::string_view str) { strings.push_back(str); }); + return strings; } void SymbolTableImpl::Encoding::moveToMemBlock(MemBlockBuilder& mem_block) { @@ -121,7 +172,7 @@ void SymbolTableImpl::Encoding::moveToMemBlock(MemBlockBuilder& mem_blo SymbolTableImpl::SymbolTableImpl() // Have to be explicitly initialized, if we want to use the GUARDED_BY macro. - : next_symbol_(0), monotonic_counter_(0) {} + : next_symbol_(FirstValidSymbol), monotonic_counter_(FirstValidSymbol) {} SymbolTableImpl::~SymbolTableImpl() { // To avoid leaks into the symbol table, we expect all StatNames to be freed. @@ -151,6 +202,10 @@ void SymbolTableImpl::addTokensToEncoding(const absl::string_view name, Encoding Thread::LockGuard lock(lock_); recent_lookups_.lookup(name); for (auto& token : tokens) { + // TODO(jmarantz): consider using StatNameDynamicStorage for tokens with + // length below some threshold, say 4 bytes. It might be preferable not to + // reserve Symbols for every 3 digit number found (for example) in ipv4 + // addresses. symbols.push_back(toSymbol(token)); } } @@ -166,7 +221,7 @@ uint64_t SymbolTableImpl::numSymbols() const { } std::string SymbolTableImpl::toString(const StatName& stat_name) const { - return decodeSymbolVec(Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize())); + return absl::StrJoin(decodeStrings(stat_name.data(), stat_name.dataSize()), "."); } void SymbolTableImpl::callWithStringView(StatName stat_name, @@ -174,19 +229,6 @@ void SymbolTableImpl::callWithStringView(StatName stat_name, fn(toString(stat_name)); } -std::string SymbolTableImpl::decodeSymbolVec(const SymbolVec& symbols) const { - std::vector name_tokens; - name_tokens.reserve(symbols.size()); - { - // Hold the lock only while decoding symbols. - Thread::LockGuard lock(lock_); - for (Symbol symbol : symbols) { - name_tokens.push_back(fromSymbol(symbol)); - } - } - return absl::StrJoin(name_tokens, "."); -} - void SymbolTableImpl::incRefCount(const StatName& stat_name) { // Before taking the lock, decode the array of symbols from the SymbolTable::Storage. const SymbolVec symbols = Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); @@ -233,20 +275,8 @@ uint64_t SymbolTableImpl::getRecentLookups(const RecentLookupsFn& iter) const { uint64_t total = 0; absl::flat_hash_map name_count_map; - // We don't want to hold stat_name_set_mutex while calling the iterator, so - // buffer lookup_data. - { - Thread::LockGuard lock(stat_name_set_mutex_); - for (StatNameSet* stat_name_set : stat_name_sets_) { - total += - stat_name_set->getRecentLookups([&name_count_map](absl::string_view str, uint64_t count) { - name_count_map[std::string(str)] += count; - }); - } - } - - // We also don't want to hold lock_ while calling the iterator, but we need it - // to access recent_lookups_. + // We don't want to hold lock_ while calling the iterator, but we need it to + // access recent_lookups_, so we buffer in name_count_map. { Thread::LockGuard lock(lock_); recent_lookups_.forEach( @@ -272,30 +302,13 @@ uint64_t SymbolTableImpl::getRecentLookups(const RecentLookupsFn& iter) const { } void SymbolTableImpl::setRecentLookupCapacity(uint64_t capacity) { - { - Thread::LockGuard lock(stat_name_set_mutex_); - for (StatNameSet* stat_name_set : stat_name_sets_) { - stat_name_set->setRecentLookupCapacity(capacity); - } - } - - { - Thread::LockGuard lock(lock_); - recent_lookups_.setCapacity(capacity); - } + Thread::LockGuard lock(lock_); + recent_lookups_.setCapacity(capacity); } void SymbolTableImpl::clearRecentLookups() { - { - Thread::LockGuard lock(stat_name_set_mutex_); - for (StatNameSet* stat_name_set : stat_name_sets_) { - stat_name_set->clearRecentLookups(); - } - } - { - Thread::LockGuard lock(lock_); - recent_lookups_.clear(); - } + Thread::LockGuard lock(lock_); + recent_lookups_.clear(); } uint64_t SymbolTableImpl::recentLookupCapacity() const { @@ -304,22 +317,11 @@ uint64_t SymbolTableImpl::recentLookupCapacity() const { } StatNameSetPtr SymbolTableImpl::makeSet(absl::string_view name) { - const uint64_t capacity = recentLookupCapacity(); // make_unique does not work with private ctor, even though SymbolTableImpl is a friend. StatNameSetPtr stat_name_set(new StatNameSet(*this, name)); - stat_name_set->setRecentLookupCapacity(capacity); - { - Thread::LockGuard lock(stat_name_set_mutex_); - stat_name_sets_.insert(stat_name_set.get()); - } return stat_name_set; } -void SymbolTableImpl::forgetSet(StatNameSet& stat_name_set) { - Thread::LockGuard lock(stat_name_set_mutex_); - stat_name_sets_.erase(&stat_name_set); -} - Symbol SymbolTableImpl::toSymbol(absl::string_view sv) { Symbol result; auto encode_find = encode_map_.find(sv); @@ -367,17 +369,14 @@ void SymbolTableImpl::newSymbol() EXCLUSIVE_LOCKS_REQUIRED(lock_) { bool SymbolTableImpl::lessThan(const StatName& a, const StatName& b) const { // Constructing two temp vectors during lessThan is not strictly necessary. // If this becomes a performance bottleneck (e.g. during sorting), we could - // provide an iterator-like interface for incrementally decoding the symbols + // provide an iterator-like interface for incrementally comparing the tokens // without allocating memory. - const SymbolVec av = Encoding::decodeSymbols(a.data(), a.dataSize()); - const SymbolVec bv = Encoding::decodeSymbols(b.data(), b.dataSize()); + const std::vector av = decodeStrings(a.data(), a.dataSize()); + const std::vector bv = decodeStrings(b.data(), b.dataSize()); - // Calling fromSymbol requires holding the lock, as it needs read-access to - // the maps that are written when adding new symbols. - Thread::LockGuard lock(lock_); for (uint64_t i = 0, n = std::min(av.size(), bv.size()); i < n; ++i) { if (av[i] != bv[i]) { - bool ret = fromSymbol(av[i]) < fromSymbol(bv[i]); + const bool ret = av[i] < bv[i]; return ret; } } @@ -410,27 +409,56 @@ SymbolTable::StoragePtr SymbolTableImpl::encode(absl::string_view name) { } StatNameStorage::StatNameStorage(absl::string_view name, SymbolTable& table) - : bytes_(table.encode(name)) {} + : StatNameStorageBase(table.encode(name)) {} StatNameStorage::StatNameStorage(StatName src, SymbolTable& table) { const uint64_t size = src.size(); MemBlockBuilder storage(size); src.copyToMemBlock(storage); - bytes_ = storage.release(); + setBytes(storage.release()); table.incRefCount(statName()); } +SymbolTable::StoragePtr SymbolTableImpl::makeDynamicStorage(absl::string_view name) { + name = StringUtil::removeTrailingCharacters(name, '.'); + + // For all StatName objects, we first have the total number of bytes in the + // representation. But for inlined dynamic string StatName variants, we must + // store the length of the payload separately, so that if this token gets + // joined with others, we'll know much space it consumes until the next token. + // So the layout is + // [ length-of-whole-StatName, LiteralStringIndicator, length-of-name, name ] + // So we need to figure out how many bytes we need to represent length-of-name + // and name. + + // payload_bytes is the total number of bytes needed to represent the + // characters in name, plus their encoded size, plus the literal indicator. + const uint64_t payload_bytes = SymbolTableImpl::Encoding::totalSizeBytes(name.size()) + 1; + + // total_bytes includes the payload_bytes, plus the LiteralStringIndicator, and + // the length of those. + const uint64_t total_bytes = SymbolTableImpl::Encoding::totalSizeBytes(payload_bytes); + MemBlockBuilder mem_block(total_bytes); + + SymbolTableImpl::Encoding::appendEncoding(payload_bytes, mem_block); + mem_block.appendOne(LiteralStringIndicator); + SymbolTableImpl::Encoding::appendEncoding(name.size(), mem_block); + mem_block.appendData(absl::MakeSpan(reinterpret_cast(name.data()), name.size())); + ASSERT(mem_block.capacityRemaining() == 0); + return mem_block.release(); +} + StatNameStorage::~StatNameStorage() { // StatNameStorage is not fully RAII: you must call free(SymbolTable&) to // decrement the reference counts held by the SymbolTable on behalf of // this StatName. This saves 8 bytes of storage per stat, relative to // holding a SymbolTable& in each StatNameStorage object. - ASSERT(bytes_ == nullptr); + ASSERT(bytes() == nullptr); } void StatNameStorage::free(SymbolTable& table) { table.free(statName()); - bytes_.reset(); + clear(); } void StatNamePool::clear() { @@ -440,13 +468,18 @@ void StatNamePool::clear() { storage_vector_.clear(); } -uint8_t* StatNamePool::addReturningStorage(absl::string_view str) { +const uint8_t* StatNamePool::addReturningStorage(absl::string_view str) { storage_vector_.push_back(Stats::StatNameStorage(str, symbol_table_)); return storage_vector_.back().bytes(); } StatName StatNamePool::add(absl::string_view str) { return StatName(addReturningStorage(str)); } +StatName StatNameDynamicPool::add(absl::string_view str) { + storage_vector_.push_back(Stats::StatNameDynamicStorage(str, symbol_table_)); + return StatName(storage_vector_.back().bytes()); +} + StatNameStorageSet::~StatNameStorageSet() { // free() must be called before destructing StatNameStorageSet to decrement // references to all symbols. @@ -487,6 +520,7 @@ SymbolTable::StoragePtr SymbolTableImpl::join(const StatNameVec& stat_names) con for (StatName stat_name : stat_names) { stat_name.appendDataToMemBlock(mem_block); } + ASSERT(mem_block.capacityRemaining() == 0); return mem_block.release(); } @@ -547,8 +581,6 @@ StatNameSet::StatNameSet(SymbolTable& symbol_table, absl::string_view name) builtin_stat_names_[""] = StatName(); } -StatNameSet::~StatNameSet() { symbol_table_.forgetSet(*this); } - void StatNameSet::rememberBuiltin(absl::string_view str) { StatName stat_name; { @@ -568,41 +600,5 @@ StatName StatNameSet::getBuiltin(absl::string_view token, StatName fallback) { return fallback; } -StatName StatNameSet::getDynamic(absl::string_view token) { - // We duplicate most of the getBuiltin implementation so that we can detect - // the difference between "not found" and "found empty stat name". - const auto iter = builtin_stat_names_.find(token); - if (iter != builtin_stat_names_.end()) { - return iter->second; - } - - { - // Other tokens require holding a lock for our local cache. - absl::MutexLock lock(&mutex_); - Stats::StatName& stat_name_ref = dynamic_stat_names_[token]; - if (stat_name_ref.empty()) { // Note that builtin_stat_names_ already has one for "". - stat_name_ref = pool_.add(token); - recent_lookups_.lookup(token); - } - return stat_name_ref; - } -} - -uint64_t StatNameSet::getRecentLookups(const RecentLookups::IterFn& iter) const { - absl::MutexLock lock(&mutex_); - recent_lookups_.forEach(iter); - return recent_lookups_.total(); -} - -void StatNameSet::clearRecentLookups() { - absl::MutexLock lock(&mutex_); - recent_lookups_.clear(); -} - -void StatNameSet::setRecentLookupCapacity(uint64_t capacity) { - absl::MutexLock lock(&mutex_); - recent_lookups_.setCapacity(capacity); -} - } // namespace Stats } // namespace Envoy diff --git a/source/common/stats/symbol_table_impl.h b/source/common/stats/symbol_table_impl.h index f5c3a9d81c470..181c5f41b905a 100644 --- a/source/common/stats/symbol_table_impl.h +++ b/source/common/stats/symbol_table_impl.h @@ -88,13 +88,28 @@ class SymbolTableImpl : public SymbolTable { * * @param symbol the symbol to encode. */ - void addSymbols(const std::vector& symbols); + void addSymbols(const SymbolVec& symbols); /** * Decodes a uint8_t array into a SymbolVec. */ static SymbolVec decodeSymbols(const SymbolTable::Storage array, uint64_t size); + /** + * Decodes a uint8_t array into a sequence of symbols and literal strings. + * There are distinct lambdas for these two options. Calls to these lambdas + * will be interleaved based on the sequencing of literal strings and + * symbols held in the data. + * + * @param array the StatName encoded as a uint8_t array. + * @param size the size of the array in bytes. + * @param symbolTokenFn a function to be called whenever a symbol is encountered in the array. + * @param stringVIewTokeNFn a function to be called whenever a string literal is encountered. + */ + static void decodeTokens(const SymbolTable::Storage array, uint64_t size, + const std::function& symbolTokenFn, + const std::function& stringViewTokenFn); + /** * Returns the number of bytes required to represent StatName as a uint8_t * array, including the encoded size. @@ -114,7 +129,7 @@ class SymbolTableImpl : public SymbolTable { * * @param mem_block_builder memory block to receive the encoded bytes. */ - void moveToMemBlock(MemBlockBuilder& array); + void moveToMemBlock(MemBlockBuilder& mem_block_builder); /** * @param number A number to encode in a variable length byte-array. @@ -170,6 +185,7 @@ class SymbolTableImpl : public SymbolTable { void populateList(const absl::string_view* names, uint32_t num_names, StatNameList& list) override; StoragePtr encode(absl::string_view name) override; + StoragePtr makeDynamicStorage(absl::string_view name) override; void callWithStringView(StatName stat_name, const std::function& fn) const override; @@ -178,7 +194,6 @@ class SymbolTableImpl : public SymbolTable { #endif StatNameSetPtr makeSet(absl::string_view name) override; - void forgetSet(StatNameSet& stat_name_set) override; uint64_t getRecentLookups(const RecentLookupsFn&) const override; void clearRecentLookups() override; void setRecentLookupCapacity(uint64_t capacity) override; @@ -187,6 +202,7 @@ class SymbolTableImpl : public SymbolTable { private: friend class StatName; friend class StatNameTest; + friend class StatNameDeathTest; struct SharedSymbol { SharedSymbol(Symbol symbol) : symbol_(symbol), ref_count_(1) {} @@ -198,19 +214,20 @@ class SymbolTableImpl : public SymbolTable { // This must be held during both encode() and free(). mutable Thread::MutexBasicLockable lock_; - // This must be held while updating stat_name_sets_. - mutable Thread::MutexBasicLockable stat_name_set_mutex_; - /** - * Decodes a vector of symbols back into its period-delimited stat name. If - * decoding fails on any part of the symbol_vec, we release_assert and crash - * hard, since this should never happen, and we don't want to continue running - * with a corrupt stats set. + * Decodes a uint8_t array into an array of period-delimited strings. Note + * that some of the strings may have periods in them, in the case where + * StatNameDynamicStorage was used. * - * @param symbols the vector of symbols to decode. + * If decoding fails on any part of the encoding, we RELEASE_ASSERT and crash, + * since this should never happen, and we don't want to continue running with + * corrupt stat names. + * + * @param array the uint8_t array of encoded symbols and dynamic strings. + * @param size the size of the array in bytes. * @return std::string the retrieved stat name. */ - std::string decodeSymbolVec(const SymbolVec& symbols) const; + std::vector decodeStrings(const Storage array, uint64_t size) const; /** * Convenience function for encode(), symbolizing one string segment at a time. @@ -267,8 +284,32 @@ class SymbolTableImpl : public SymbolTable { // using an Envoy::IntervalSet. std::stack pool_ GUARDED_BY(lock_); RecentLookups recent_lookups_ GUARDED_BY(lock_); +}; + +// Base class for holding the backing-storing for a StatName. The two derived +// classes, StatNameStorage and StatNameDynamicStore, share a need to hold an +// array of bytes, but use different representations. +class StatNameStorageBase { +public: + StatNameStorageBase(SymbolTable::StoragePtr&& bytes) : bytes_(std::move(bytes)) {} + StatNameStorageBase() {} - absl::flat_hash_set stat_name_sets_ GUARDED_BY(stat_name_set_mutex_); + /** + * @return a reference to the owned storage. + */ + inline StatName statName() const; + + /** + * @return the encoded data as a const pointer. + */ + const uint8_t* bytes() const { return bytes_.get(); } + +protected: + void setBytes(SymbolTable::StoragePtr&& bytes) { bytes_ = std::move(bytes); } + void clear() { bytes_.reset(); } + +private: + SymbolTable::StoragePtr bytes_; }; /** @@ -284,7 +325,7 @@ class SymbolTableImpl : public SymbolTable { * Thus this class is inconvenient to directly use as temp storage for building * a StatName from a string. Instead it should be used via StatNameManagedStorage. */ -class StatNameStorage { +class StatNameStorage : public StatNameStorageBase { public: // Basic constructor for when you have a name as a string, and need to // generate symbols for it. @@ -292,7 +333,7 @@ class StatNameStorage { // Move constructor; needed for using StatNameStorage as an // absl::flat_hash_map value. - StatNameStorage(StatNameStorage&& src) noexcept : bytes_(std::move(src.bytes_)) {} + StatNameStorage(StatNameStorage&& src) noexcept : StatNameStorageBase(std::move(src)) {} // Obtains new backing storage for an already existing StatName. Used to // record a computed StatName held in a temp into a more persistent data @@ -312,16 +353,6 @@ class StatNameStorage { * @param table the symbol table. */ void free(SymbolTable& table); - - /** - * @return StatName a reference to the owned storage. - */ - inline StatName statName() const; - - uint8_t* bytes() { return bytes_.get(); } - -private: - SymbolTable::StoragePtr bytes_; }; /** @@ -376,13 +407,13 @@ class StatName { bool operator!=(const StatName& rhs) const { return !(*this == rhs); } /** - * @return uint64_t the number of bytes in the symbol array, excluding the two-byte + * @return uint64_t the number of bytes in the symbol array, excluding the * overhead for the size itself. */ uint64_t dataSize() const; /** - * @return uint64_t the number of bytes in the symbol array, including the two-byte + * @return uint64_t the number of bytes in the symbol array, including the * overhead for the size itself. */ uint64_t size() const { return SymbolTableImpl::Encoding::totalSizeBytes(dataSize()); } @@ -396,7 +427,7 @@ class StatName { */ void copyToMemBlock(MemBlockBuilder& mem_block_builder) { ASSERT(mem_block_builder.size() == 0); - mem_block_builder.appendData(absl::MakeConstSpan(size_and_data_, size())); + mem_block_builder.appendData(absl::MakeSpan(size_and_data_, size())); } /** @@ -408,7 +439,7 @@ class StatName { * @param mem_block_builder the builder to receive the storage. */ void appendDataToMemBlock(MemBlockBuilder& storage) { - storage.appendData(absl::MakeConstSpan(data(), dataSize())); + storage.appendData(absl::MakeSpan(data(), dataSize())); } #ifndef ENVOY_CONFIG_COVERAGE @@ -431,7 +462,7 @@ class StatName { const uint8_t* size_and_data_{nullptr}; }; -StatName StatNameStorage::statName() const { return StatName(bytes_.get()); } +StatName StatNameStorageBase::statName() const { return StatName(bytes_.get()); } /** * Contains the backing store for a StatName and enough context so it can @@ -463,6 +494,23 @@ class StatNameManagedStorage : public StatNameStorage { SymbolTable& symbol_table_; }; +/** + * Holds backing-store for a dynamic stat, where are no global locks needed + * to create a StatName from a string, but there is no sharing of token data + * between names, so there may be more memory consumed. + */ +class StatNameDynamicStorage : public StatNameStorageBase { +public: + // Basic constructor based on a name. Note the table is used for + // a virtual-function call to encode the name, but no locks are taken + // in either implementation of the SymbolTable api. + StatNameDynamicStorage(absl::string_view name, SymbolTable& table) + : StatNameStorageBase(table.makeDynamicStorage(name)) {} + // Move constructor. + StatNameDynamicStorage(StatNameDynamicStorage&& src) noexcept + : StatNameStorageBase(std::move(src)) {} +}; + /** * Maintains storage for a collection of StatName objects. Like * StatNameManagedStorage, this has an RAII usage model, taking @@ -473,7 +521,7 @@ class StatNameManagedStorage : public StatNameStorage { * StatNamePool pool(symbol_table); * StatName name1 = pool.add("name1"); * StatName name2 = pool.add("name2"); - * uint8_t* storage = pool.addReturningStorage("name3"); + * const uint8_t* storage = pool.addReturningStorage("name3"); * StatName name3(storage); */ class StatNamePool { @@ -508,7 +556,7 @@ class StatNamePool { * @return a pointer to the bytes held in the container for this name, suitable for * using to construct a StatName. */ - uint8_t* addReturningStorage(absl::string_view name); + const uint8_t* addReturningStorage(absl::string_view name); private: // We keep the stat names in a vector of StatNameStorage, storing the @@ -518,6 +566,43 @@ class StatNamePool { std::vector storage_vector_; }; +/** + * Maintains storage for a collection of StatName objects constructed from + * dynamically discovered strings. Like StatNameDynamicStorage, this has an RAII + * usage model. Creating StatNames with this interface do not incur a + * SymbolTable lock, but tokens are not shared across StatNames. + * + * The SymbolTable is required as a constructor argument to assist in encoding + * the stat-names, which differs between FakeSymbolTableImpl and SymbolTableImpl. + * + * Example usage: + * StatNameDynamicPool pool(symbol_table); + * StatName name1 = pool.add("name1"); + * StatName name2 = pool.add("name2"); + */ +class StatNameDynamicPool { +public: + explicit StatNameDynamicPool(SymbolTable& symbol_table) : symbol_table_(symbol_table) {} + + /** + * Removes all StatNames from the pool. + */ + void clear() { storage_vector_.clear(); } + + /** + * @param name the name to add the container. + * @return the StatName held in the container for this name. + */ + StatName add(absl::string_view name); + +private: + // We keep the stat names in a vector of StatNameStorage, storing the + // SymbolTable reference separately. This saves 8 bytes per StatName, + // at the cost of having a destructor that calls clear(). + SymbolTable& symbol_table_; + std::vector storage_vector_; +}; + // Represents an ordered container of StatNames. The encoding for each StatName // is byte-packed together, so this carries less overhead than allocating the // storage separately. The trade-off is there is no random access; you can only @@ -682,8 +767,6 @@ class StatNameSet { public: // This object must be instantiated via SymbolTable::makeSet(), thus constructor is private. - ~StatNameSet(); - /** * Adds a string to the builtin map, which is not mutex protected. This map is * always consulted first as a hit there means no lock is required. @@ -705,23 +788,6 @@ class StatNameSet { rememberBuiltins>(container); } - /** - * Finds a StatName by name. If 'token' has been remembered as a built-in, - * then no lock is required. Otherwise we must consult dynamic_stat_names_ - * under a lock that's private to the StatNameSet. If that's empty, we need to - * create the StatName in the pool, which requires taking a global lock, and - * then remember the new StatName in the dynamic_stat_names_. This allows - * subsequent lookups of the same string to take only the set's lock, and not - * the whole symbol-table lock. - * - * @return a StatName corresponding to the passed-in token, owned by the set. - * - * TODO(jmarantz): Potential perf issue here with contention, both on this - * set's mutex and also the SymbolTable mutex which must be taken during - * StatNamePool::add(). - */ - StatName getDynamic(absl::string_view token); - /** * Finds a builtin StatName by name. If the builtin has not been registered, * then the fallback is returned. @@ -738,24 +804,11 @@ class StatNameSet { return pool_.add(str); } - /** - * Clears recent lookups. - */ - void clearRecentLookups(); - - /** - * Sets the number of names recorded in the recent-lookups set. - * - * @param capacity the capacity to configure. - */ - void setRecentLookupCapacity(uint64_t capacity); - private: friend class FakeSymbolTableImpl; friend class SymbolTableImpl; StatNameSet(SymbolTable& symbol_table, absl::string_view name); - uint64_t getRecentLookups(const RecentLookups::IterFn& iter) const; const std::string name_; Stats::SymbolTable& symbol_table_; @@ -763,8 +816,6 @@ class StatNameSet { mutable absl::Mutex mutex_; using StringStatNameMap = absl::flat_hash_map; StringStatNameMap builtin_stat_names_; - StringStatNameMap dynamic_stat_names_ GUARDED_BY(mutex_); - RecentLookups recent_lookups_ GUARDED_BY(mutex_); }; } // namespace Stats diff --git a/source/docs/stats.md b/source/docs/stats.md index 3101e77c0be52..11783ae47def4 100644 --- a/source/docs/stats.md +++ b/source/docs/stats.md @@ -40,8 +40,8 @@ This implementation is complicated so here is a rough overview of the threading shared across all worker threads. * Per thread caches are checked, and if empty, they are populated from the central cache. * Scopes are entirely owned by the caller. The store only keeps weak pointers. - * When a scope is destroyed, a cache flush operation is run on all threads to flush any cached - data owned by the destroyed scope. + * When a scope is destroyed, a cache flush operation is posted on all threads to flush any + cached data owned by the destroyed scope. * Scopes use a unique incrementing ID for the cache key. This ensures that if a new scope is created at the same address as a recently deleted scope, cache references will not accidentally reference the old scope which may be about to be cache flushed. @@ -130,8 +130,41 @@ across the codebase. `const StatName` member variables. Most names should be established during process initializion or in response to xDS updates. -`StatNameSet` provides some associative lookups at runtime, using two maps: a -static map and a dynamic map. +`StatNameSet` provides some associative lookups at runtime. The associations +should be created before the set is used for requests, via +`StatNameSet::rememberBuiltin`. This is useful in scenarios where stat-names are +derived from data in a request, but there are limited set of known tokens, such +as SSL ciphers or Redis commands. + +### Dynamic stat tokens + +While stats are usually composed of tokens that are known at compile-time, there +are scenarios where the names are newly discovered from data in requests. To +avoid taking locks in this case, tokens can be formed dynamically using +`StatNameDynamicStorage` or `StatNameDynamicPool`. In this case we lose +substring sharing but we avoid taking locks. Dynamically generaeted tokens can +be combined with symbolized tokens from `StatNameSet` or `StatNamePool` using +`SymbolTable::join()`. + +Relative to using symbolized tokens, The cost of using dynamic tokens is: + + * the StatName must be allocated and populated from the string data every time + `StatNameDynamicPool::add()` is called or `StatNameDynamicStorage` is constructed. + * the resulting `StatName`s are as long as the string, rather than benefiting from + a symbolized representation, which is typically 4 bytes or less per token. + +However, the cost of using dynamic tokens is on par with the cost of not using +a StatName system at all, only adding one re-encoding. And it is hard to quantify +the benefit of avoiding mutex contention when there are large numbers of threads. + +### Symbol Table Memory Layout + +Below is a diagram +[(source)](https://docs.google.com/drawings/d/1eG6CHSUFQ5zkk-j-kcFCUay2-D_ktF39Tbzql5ypUDc/edit) +showing the memory layout for a few scenarios of constructing and joining symbolized +`StatName` and dynamic `StatName`. + +![Symbol Table Memory Diagram](symtab.png) ### Current State and Strategy To Deploy Symbol Tables @@ -164,6 +197,25 @@ Once we are confident we've removed all hot-path symbol-table lookups, ideally through usage of real symbol tables in production, examining that endpoint, we can enable real symbol tables by default. +### Symbol Table Class Overview + +Class | Superclass | Description +-----| ---------- | --------- +SymbolTable | | Abstract class providing an interface for symbol tables +FakeSymbolTableImpl | SymbolTable | Implementation of SymbolTable API where StatName is represented as a flat string +SymbolTableImpl | SymbolTable | Implementation of SymbolTable API where StatName share symbols held in a table +SymbolTableImpl::Encoding | | Helper class for incrementally encoding strings into symbols +StatName | | Provides an API and a view into a StatName (dynamic, symbolized, or fake). Like absl::string_view, the backing store must be separately maintained. +StatNameStorageBase | | Holds storage (an array of bytes) for a dynamic or symbolized StatName +StatNameStorage | StatNameStorageBase | Holds storage for a symbolized StatName. Must be explicitly freed (not just destructed). +StatNameManagedStorage | StatNameStorage | Like StatNameStorage, but is 8 bytes larger, and can be destructed without free(). +StatNameDynamicStorage | StatNameStorageBase | Holds StatName storage for a dynamic (not symbolized) StatName. +StatNamePool | | Holds backing store for any number of symbolized StatNames. +StatNameDynamicPool | | Holds backing store for any number of dynamic StatNames. +StatNameList | | Provides packed backing store for an ordered collection of StatNames, that are only accessed sequentially. Used for MetricImpl. +StatNameStorageSet | | Implements a set of StatName with lookup via StatName. Used for rejected stats. +StatNameSet | | Implements a set of StatName with lookup via string_view. Used to remember well-known names during startup, e.g. Redis commands. + ## Tags and Tag Extraction TBD diff --git a/source/docs/symtab.png b/source/docs/symtab.png new file mode 100644 index 0000000000000..7d78ea6ed7391 Binary files /dev/null and b/source/docs/symtab.png differ diff --git a/source/extensions/filters/http/dynamo/dynamo_filter.cc b/source/extensions/filters/http/dynamo/dynamo_filter.cc index 966f42837c150..f91296b9d8def 100644 --- a/source/extensions/filters/http/dynamo/dynamo_filter.cc +++ b/source/extensions/filters/http/dynamo/dynamo_filter.cc @@ -176,11 +176,16 @@ void DynamoFilter::chargeStatsPerEntity(const std::string& entity, const std::st time_source_.monotonicTime() - start_decode_); size_t group_index = DynamoStats::groupIndex(status); + Stats::StatNameDynamicPool dynamic(stats_->symbolTable()); + const Stats::StatName entity_type_name = stats_->getBuiltin(entity_type, stats_->unknown_entity_type_); - const Stats::StatName entity_name = stats_->getDynamic(entity); - const Stats::StatName total_name = stats_->getDynamic(absl::StrCat("upstream_rq_total_", status)); - const Stats::StatName time_name = stats_->getDynamic(absl::StrCat("upstream_rq_time_", status)); + const Stats::StatName entity_name = dynamic.add(entity); + + // TODO(jmarantz): Consider using a similar mechanism to common/http/codes.cc + // to avoid creating dynamic stat-names for common statuses. + const Stats::StatName total_name = dynamic.add(absl::StrCat("upstream_rq_total_", status)); + const Stats::StatName time_name = dynamic.add(absl::StrCat("upstream_rq_time_", status)); stats_->counter({entity_type_name, entity_name, stats_->upstream_rq_total_}).inc(); const Stats::StatName total_group = stats_->upstream_rq_total_groups_[group_index]; @@ -205,9 +210,8 @@ void DynamoFilter::chargeUnProcessedKeysStats(const Json::Object& json_body) { // complete apart of the batch operation. Only the table names will be logged for errors. std::vector unprocessed_tables = RequestParser::parseBatchUnProcessedKeys(json_body); for (const std::string& unprocessed_table : unprocessed_tables) { - stats_ - ->counter({stats_->error_, stats_->getDynamic(unprocessed_table), - stats_->batch_failure_unprocessed_keys_}) + Stats::StatNameDynamicStorage storage(unprocessed_table, stats_->symbolTable()); + stats_->counter({stats_->error_, storage.statName(), stats_->batch_failure_unprocessed_keys_}) .inc(); } } @@ -216,12 +220,13 @@ void DynamoFilter::chargeFailureSpecificStats(const Json::Object& json_body) { std::string error_type = RequestParser::parseErrorType(json_body); if (!error_type.empty()) { + Stats::StatNameDynamicPool dynamic(stats_->symbolTable()); if (table_descriptor_.table_name.empty()) { - stats_->counter({stats_->error_, stats_->no_table_, stats_->getDynamic(error_type)}).inc(); + stats_->counter({stats_->error_, stats_->no_table_, dynamic.add(error_type)}).inc(); } else { stats_ - ->counter({stats_->error_, stats_->getDynamic(table_descriptor_.table_name), - stats_->getDynamic(error_type)}) + ->counter( + {stats_->error_, dynamic.add(table_descriptor_.table_name), dynamic.add(error_type)}) .inc(); } } else { diff --git a/source/extensions/filters/http/dynamo/dynamo_stats.cc b/source/extensions/filters/http/dynamo/dynamo_stats.cc index bf30a27a0f22d..3b0eb2a484741 100644 --- a/source/extensions/filters/http/dynamo/dynamo_stats.cc +++ b/source/extensions/filters/http/dynamo/dynamo_stats.cc @@ -70,9 +70,11 @@ Stats::Counter& DynamoStats::buildPartitionStatCounter(const std::string& table_ const std::string& partition_id) { // Use the last 7 characters of the partition id. absl::string_view id_last_7 = absl::string_view(partition_id).substr(partition_id.size() - 7); - const Stats::SymbolTable::StoragePtr stat_name_storage = addPrefix( - {table_, getDynamic(table_name), capacity_, getBuiltin(operation, unknown_operation_), - getDynamic(absl::StrCat("__partition_id=", id_last_7))}); + Stats::StatNameDynamicPool dynamic(scope_.symbolTable()); + const Stats::StatName partition = dynamic.add(absl::StrCat("__partition_id=", id_last_7)); + const Stats::SymbolTable::StoragePtr stat_name_storage = + addPrefix({table_, dynamic.add(table_name), capacity_, + getBuiltin(operation, unknown_operation_), partition}); return scope_.counterFromStatName(Stats::StatName(stat_name_storage.get())); } diff --git a/source/extensions/filters/http/dynamo/dynamo_stats.h b/source/extensions/filters/http/dynamo/dynamo_stats.h index 628c0c476fbad..6d61fa532391c 100644 --- a/source/extensions/filters/http/dynamo/dynamo_stats.h +++ b/source/extensions/filters/http/dynamo/dynamo_stats.h @@ -32,16 +32,14 @@ class DynamoStats { static size_t groupIndex(uint64_t status); /** - * Finds or creates a StatName by string, taking a global lock if needed. - * - * TODO(jmarantz): Potential perf issue here with mutex contention for names - * that have not been remembered as builtins in the constructor. + * Finds a StatName by string. */ - Stats::StatName getDynamic(const std::string& str) { return stat_name_set_->getDynamic(str); } Stats::StatName getBuiltin(const std::string& str, Stats::StatName fallback) { return stat_name_set_->getBuiltin(str, fallback); } + Stats::SymbolTable& symbolTable() { return scope_.symbolTable(); } + private: Stats::SymbolTable::StoragePtr addPrefix(const Stats::StatNameVec& names); diff --git a/source/extensions/filters/http/fault/fault_filter.cc b/source/extensions/filters/http/fault/fault_filter.cc index 98209074643f0..5fa6cb7c39192 100644 --- a/source/extensions/filters/http/fault/fault_filter.cc +++ b/source/extensions/filters/http/fault/fault_filter.cc @@ -87,10 +87,9 @@ FaultFilterConfig::FaultFilterConfig( delays_injected_(stat_name_set_->add("delays_injected")), stats_prefix_(stat_name_set_->add(absl::StrCat(stats_prefix, "fault"))) {} -void FaultFilterConfig::incCounter(absl::string_view downstream_cluster, - Stats::StatName stat_name) { - Stats::SymbolTable::StoragePtr storage = scope_.symbolTable().join( - {stats_prefix_, stat_name_set_->getDynamic(downstream_cluster), stat_name}); +void FaultFilterConfig::incCounter(Stats::StatName downstream_cluster, Stats::StatName stat_name) { + Stats::SymbolTable::StoragePtr storage = + scope_.symbolTable().join({stats_prefix_, downstream_cluster, stat_name}); scope_.counterFromStatName(Stats::StatName(storage.get())).inc(); } @@ -140,6 +139,10 @@ Http::FilterHeadersStatus FaultFilter::decodeHeaders(Http::HeaderMap& headers, b if (headers.EnvoyDownstreamServiceCluster()) { downstream_cluster_ = std::string(headers.EnvoyDownstreamServiceCluster()->value().getStringView()); + if (!downstream_cluster_.empty()) { + downstream_cluster_storage_ = std::make_unique( + downstream_cluster_, config_->scope().symbolTable()); + } downstream_cluster_delay_percent_key_ = fmt::format("fault.http.{}.delay.fixed_delay_percent", downstream_cluster_); @@ -292,7 +295,7 @@ uint64_t FaultFilter::abortHttpStatus() { void FaultFilter::recordDelaysInjectedStats() { // Downstream specific stats. if (!downstream_cluster_.empty()) { - config_->incDelays(downstream_cluster_); + config_->incDelays(downstream_cluster_storage_->statName()); } // General stats. All injected faults are considered a single aggregate active fault. @@ -303,7 +306,7 @@ void FaultFilter::recordDelaysInjectedStats() { void FaultFilter::recordAbortsInjectedStats() { // Downstream specific stats. if (!downstream_cluster_.empty()) { - config_->incAborts(downstream_cluster_); + config_->incAborts(downstream_cluster_storage_->statName()); } // General stats. All injected faults are considered a single aggregate active fault. diff --git a/source/extensions/filters/http/fault/fault_filter.h b/source/extensions/filters/http/fault/fault_filter.h index ab8f2ee12d42a..a6baf1f8fadbc 100644 --- a/source/extensions/filters/http/fault/fault_filter.h +++ b/source/extensions/filters/http/fault/fault_filter.h @@ -116,17 +116,17 @@ class FaultFilterConfig { const FaultSettings* settings() { return &settings_; } TimeSource& timeSource() { return time_source_; } - void incDelays(absl::string_view downstream_cluster) { + void incDelays(Stats::StatName downstream_cluster) { incCounter(downstream_cluster, delays_injected_); } - void incAborts(absl::string_view downstream_cluster) { + void incAborts(Stats::StatName downstream_cluster) { incCounter(downstream_cluster, aborts_injected_); } private: static FaultFilterStats generateStats(const std::string& prefix, Stats::Scope& scope); - void incCounter(absl::string_view downstream_cluster, Stats::StatName stat_name); + void incCounter(Stats::StatName downstream_cluster, Stats::StatName stat_name); const FaultSettings settings_; Runtime::Loader& runtime_; @@ -258,6 +258,7 @@ class FaultFilter : public Http::StreamFilter, Logger::Loggable downstream_cluster_storage_; const FaultSettings* fault_settings_; bool fault_active_{}; std::unique_ptr response_limiter_; diff --git a/source/extensions/filters/http/ip_tagging/ip_tagging_filter.cc b/source/extensions/filters/http/ip_tagging/ip_tagging_filter.cc index 2b169317944a6..7bb0abd225bae 100644 --- a/source/extensions/filters/http/ip_tagging/ip_tagging_filter.cc +++ b/source/extensions/filters/http/ip_tagging/ip_tagging_filter.cc @@ -51,11 +51,17 @@ IpTaggingFilterConfig::IpTaggingFilterConfig( tag_data.emplace_back(ip_tag.ip_tag_name(), cidr_set); } trie_ = std::make_unique>(tag_data); + // TODO(jmarantz): save stat-names for each tag as stat_name_set builtins. } void IpTaggingFilterConfig::incCounter(Stats::StatName name, absl::string_view tag) { - Stats::SymbolTable::StoragePtr storage = - scope_.symbolTable().join({stats_prefix_, stat_name_set_->getDynamic(tag), name}); + Stats::SymbolTable::StoragePtr storage; + if (tag.empty()) { + storage = scope_.symbolTable().join({stats_prefix_, name}); + } else { + Stats::StatNameDynamicStorage tag_storage(tag, scope_.symbolTable()); + storage = scope_.symbolTable().join({stats_prefix_, tag_storage.statName(), name}); + } scope_.counterFromStatName(Stats::StatName(storage.get())).inc(); } diff --git a/source/extensions/filters/network/mongo_proxy/mongo_stats.h b/source/extensions/filters/network/mongo_proxy/mongo_stats.h index a1a53e80067fc..f49d4d34e7bfa 100644 --- a/source/extensions/filters/network/mongo_proxy/mongo_stats.h +++ b/source/extensions/filters/network/mongo_proxy/mongo_stats.h @@ -31,7 +31,7 @@ class MongoStats { return stat_name_set_->getBuiltin(str, fallback); } - Stats::StatName getDynamic(const std::string& str) { return stat_name_set_->getDynamic(str); } + Stats::SymbolTable& symbolTable() { return scope_.symbolTable(); } private: Stats::SymbolTable::StoragePtr addPrefix(const std::vector& names); diff --git a/source/extensions/filters/network/mongo_proxy/proxy.cc b/source/extensions/filters/network/mongo_proxy/proxy.cc index f8ae0c5f747f1..0ec6c178291b1 100644 --- a/source/extensions/filters/network/mongo_proxy/proxy.cc +++ b/source/extensions/filters/network/mongo_proxy/proxy.cc @@ -155,13 +155,14 @@ void ProxyFilter::decodeQuery(QueryMessagePtr&& message) { Stats::StatNameVec names; names.reserve(6); // 2 entries are added by chargeQueryStats(). names.push_back(mongo_stats_->collection_); - names.push_back(mongo_stats_->getDynamic(active_query->query_info_.collection())); + Stats::StatNameDynamicPool dynamic(mongo_stats_->symbolTable()); + names.push_back(dynamic.add(active_query->query_info_.collection())); chargeQueryStats(names, query_type); // Callsite stats if we have it. if (!active_query->query_info_.callsite().empty()) { names.push_back(mongo_stats_->callsite_); - names.push_back(mongo_stats_->getDynamic(active_query->query_info_.callsite())); + names.push_back(dynamic.add(active_query->query_info_.callsite())); chargeQueryStats(names, query_type); } @@ -229,8 +230,9 @@ void ProxyFilter::decodeReply(ReplyMessagePtr&& message) { chargeReplyStats(active_query, names, *message); } else { // Collection stats first. + Stats::StatNameDynamicPool dynamic(mongo_stats_->symbolTable()); Stats::StatNameVec names{mongo_stats_->collection_, - mongo_stats_->getDynamic(active_query.query_info_.collection()), + dynamic.add(active_query.query_info_.collection()), mongo_stats_->query_}; chargeReplyStats(active_query, names, *message); @@ -240,7 +242,7 @@ void ProxyFilter::decodeReply(ReplyMessagePtr&& message) { // to mutate the array to {"collection", collection, "callsite", callsite, "query"}. ASSERT(names.size() == 3); names.back() = mongo_stats_->callsite_; // Replaces "query". - names.push_back(mongo_stats_->getDynamic(active_query.query_info_.callsite())); + names.push_back(dynamic.add(active_query.query_info_.callsite())); names.push_back(mongo_stats_->query_); chargeReplyStats(active_query, names, *message); } diff --git a/test/common/http/codes_test.cc b/test/common/http/codes_test.cc index ff501bfe26f53..2d615cc7974b0 100644 --- a/test/common/http/codes_test.cc +++ b/test/common/http/codes_test.cc @@ -279,25 +279,5 @@ TEST_F(CodeUtilityTest, ResponseTimingTest) { code_stats.chargeResponseTiming(info); } -class CodeStatsTest : public testing::Test { -protected: - CodeStatsTest() - : symbol_table_(Stats::SymbolTableCreator::makeSymbolTable()), code_stats_(*symbol_table_) {} - - absl::string_view stripTrailingDot(absl::string_view prefix) { - return CodeStatsImpl::stripTrailingDot(prefix); - } - - Stats::SymbolTablePtr symbol_table_; - CodeStatsImpl code_stats_; -}; - -TEST_F(CodeStatsTest, StripTrailingDot) { - EXPECT_EQ("", stripTrailingDot("")); - EXPECT_EQ("foo", stripTrailingDot("foo.")); - EXPECT_EQ(".foo", stripTrailingDot(".foo")); // no change - EXPECT_EQ("foo.", stripTrailingDot("foo..")); // only one dot gets stripped. -} - } // namespace Http } // namespace Envoy diff --git a/test/common/stats/stat_test_utility.cc b/test/common/stats/stat_test_utility.cc index adcd6d7a3fa5e..7245cddf388d0 100644 --- a/test/common/stats/stat_test_utility.cc +++ b/test/common/stats/stat_test_utility.cc @@ -135,7 +135,7 @@ MemoryTest::Mode MemoryTest::mode() { #endif } -// TODO(jmarantz): this utility is intended to be costs both for unit tests +// TODO(jmarantz): this utility is intended to be used both for unit tests // and fuzz tests. But those have different checking macros, e.g. EXPECT_EQ vs // FUZZ_ASSERT. std::vector serializeDeserializeNumber(uint64_t number) { diff --git a/test/common/stats/symbol_table_fuzz_test.cc b/test/common/stats/symbol_table_fuzz_test.cc index e1a08a03e7a57..ee17abcec27c8 100644 --- a/test/common/stats/symbol_table_fuzz_test.cc +++ b/test/common/stats/symbol_table_fuzz_test.cc @@ -19,11 +19,15 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { SymbolTableImpl symbol_table; StatNamePool pool(symbol_table); StatNamePool fake_pool(fake_symbol_table); + StatNameDynamicPool dynamic_pool(symbol_table); + StatNameDynamicPool fake_dynamic_pool(fake_symbol_table); while (provider.remaining_bytes() != 0) { std::string next_data = provider.ConsumeRandomLengthString(provider.remaining_bytes()); StatName stat_name = pool.add(next_data); StatName fake_stat_name = fake_pool.add(next_data); + StatName dynamic_stat_name = dynamic_pool.add(next_data); + StatName fake_dynamic_stat_name = fake_dynamic_pool.add(next_data); // We can add stat-names with trailing dots, but note that they will be // trimmed by the Symbol Table implementation, so we must trim the input @@ -31,6 +35,8 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { absl::string_view trimmed_fuzz_data = StringUtil::removeTrailingCharacters(next_data, '.'); FUZZ_ASSERT(trimmed_fuzz_data == symbol_table.toString(stat_name)); FUZZ_ASSERT(trimmed_fuzz_data == fake_symbol_table.toString(fake_stat_name)); + FUZZ_ASSERT(trimmed_fuzz_data == symbol_table.toString(dynamic_stat_name)); + FUZZ_ASSERT(trimmed_fuzz_data == fake_symbol_table.toString(fake_dynamic_stat_name)); // Test all combinations of joins within each symbol table. std::string joined = absl::StrCat(trimmed_fuzz_data, ".", trimmed_fuzz_data); @@ -39,9 +45,14 @@ DEFINE_FUZZER(const uint8_t* buf, size_t len) { StatName stat_name(storage.get()); return table.toString(stat_name); }; - FUZZ_ASSERT(joined == join(symbol_table, stat_name, stat_name)); + FUZZ_ASSERT(joined == join(symbol_table, stat_name, dynamic_stat_name)); + FUZZ_ASSERT(joined == join(symbol_table, dynamic_stat_name, dynamic_stat_name)); + FUZZ_ASSERT(joined == join(symbol_table, dynamic_stat_name, stat_name)); FUZZ_ASSERT(joined == join(fake_symbol_table, fake_stat_name, fake_stat_name)); + FUZZ_ASSERT(joined == join(fake_symbol_table, fake_stat_name, fake_dynamic_stat_name)); + FUZZ_ASSERT(joined == join(fake_symbol_table, fake_dynamic_stat_name, fake_dynamic_stat_name)); + FUZZ_ASSERT(joined == join(fake_symbol_table, fake_dynamic_stat_name, fake_stat_name)); // Also encode the string directly, without symbolizing it. TestUtil::serializeDeserializeString(next_data); diff --git a/test/common/stats/symbol_table_impl_test.cc b/test/common/stats/symbol_table_impl_test.cc index 9bd050dbf6df5..a6ca10bba3bbc 100644 --- a/test/common/stats/symbol_table_impl_test.cc +++ b/test/common/stats/symbol_table_impl_test.cc @@ -60,9 +60,6 @@ class StatNameTest : public testing::TestWithParam { SymbolVec getSymbols(StatName stat_name) { return SymbolTableImpl::Encoding::decodeSymbols(stat_name.data(), stat_name.dataSize()); } - std::string decodeSymbolVec(const SymbolVec& symbol_vec) { - return real_symbol_table_->decodeSymbolVec(symbol_vec); - } Symbol monotonicCounter() { return real_symbol_table_->monotonicCounter(); } std::string encodeDecode(absl::string_view stat_name) { return table_->toString(makeStat(stat_name)); @@ -89,6 +86,10 @@ TEST_P(StatNameTest, SerializeBytes) { EXPECT_EQ((std::vector{128, 1}), serializeDeserialize(128)); EXPECT_EQ((std::vector{129, 1}), serializeDeserialize(129)); EXPECT_EQ((std::vector{255, 1}), serializeDeserialize(255)); + + // This is the example from the image in stats.md. + EXPECT_EQ((std::vector{0x80 + 5, 2}), serializeDeserialize(261)); + EXPECT_EQ((std::vector{255, 127}), serializeDeserialize(16383)); EXPECT_EQ((std::vector{128, 128, 1}), serializeDeserialize(16384)); EXPECT_EQ((std::vector{129, 128, 1}), serializeDeserialize(16385)); @@ -140,6 +141,55 @@ TEST_P(StatNameTest, TestEmpty) { EXPECT_TRUE(StatName().empty()); } +TEST_P(StatNameTest, TestDynamic100k) { + // Tests 100k different sizes of dynamic stat, covering all kinds of + // corner cases of spilling over into multi-byte lengths. + + std::string stat_str("dynamic_stat.x"); + for (int i = 0; i < 100 * 1000; ++i) { + char ch = i % 256; + if (ch == '.') { + ch = 'x'; + } + stat_str += ch; + StatNameDynamicStorage storage(stat_str, *table_); + StatName dynamic = storage.statName(); + EXPECT_EQ(stat_str, table_->toString(dynamic)); + SymbolTable::StoragePtr joined = table_->join({makeStat("a.b"), dynamic, makeStat("c.d")}); + EXPECT_EQ(absl::StrCat("a.b.", stat_str, ".c.d"), table_->toString(StatName(joined.get()))); + } +} + +TEST_P(StatNameTest, TestDynamicPools) { + // Same test for a dynamically allocated name. The only difference between + // the behavior with a remembered vs dynamic name is that when looking + // up a remembered name, a mutex is not taken. But we have no easy way + // to test for that. So we'll at least cover the code. + StatNameDynamicPool d1(*table_); + const StatName dynamic = d1.add("dynamic"); + EXPECT_EQ("dynamic", table_->toString(dynamic)); + + // The nature of the StatNameDynamicPool is that there is no sharing (and also no locks). + EXPECT_NE(dynamic.data(), d1.add("dynamic").data()); + + // Make sure blanks are always the same. + const StatName blank = d1.add(""); + EXPECT_EQ("", table_->toString(blank)); + EXPECT_NE(blank.data(), d1.add("").data()); + EXPECT_NE(blank.data(), d1.add("").data()); + EXPECT_NE(blank.data(), d1.add(absl::string_view()).data()); + + // There's another corner case for the same "dynamic" name from a + // different set. Here we will get a different StatName object + // out of the second set, though it will share the same underlying + // symbol-table symbol. + StatNameDynamicPool d2(*table_); + const StatName dynamic2 = d2.add("dynamic"); + EXPECT_EQ("dynamic", table_->toString(dynamic2)); + EXPECT_NE(dynamic2.data(), d2.add("dynamic").data()); // No storage sharing. + EXPECT_NE(dynamic2.data(), dynamic.data()); +} + TEST_P(StatNameTest, Test100KSymbolsRoundtrip) { for (int i = 0; i < 100 * 1000; ++i) { const std::string stat_name = absl::StrCat("symbol_", i); @@ -191,7 +241,17 @@ TEST_P(StatNameTest, TestSuccessfulDecode) { EXPECT_EQ(table_->toString(stat_name_1), stat_name); } -class StatNameDeathTest : public StatNameTest {}; +class StatNameDeathTest : public StatNameTest { +public: + void decodeSymbolVec(const SymbolVec& symbol_vec) { + Thread::LockGuard lock(real_symbol_table_->lock_); + for (Symbol symbol : symbol_vec) { + real_symbol_table_->fromSymbol(symbol); + } + } +}; +INSTANTIATE_TEST_SUITE_P(StatNameDeathTest, StatNameDeathTest, + testing::ValuesIn({SymbolTableType::Real})); TEST_P(StatNameDeathTest, TestBadDecodes) { if (GetParam() == SymbolTableType::Fake) { @@ -274,11 +334,11 @@ TEST_P(StatNameTest, FreePoolTest) { makeStat("3a"); makeStat("4a"); makeStat("5a"); - EXPECT_EQ(monotonicCounter(), 5); + EXPECT_EQ(monotonicCounter(), 6); EXPECT_EQ(table_->numSymbols(), 5); clearStorage(); } - EXPECT_EQ(monotonicCounter(), 5); + EXPECT_EQ(monotonicCounter(), 6); EXPECT_EQ(table_->numSymbols(), 0); // These are different strings being encoded, but they should recycle through the same symbols as @@ -288,11 +348,11 @@ TEST_P(StatNameTest, FreePoolTest) { makeStat("3b"); makeStat("4b"); makeStat("5b"); - EXPECT_EQ(monotonicCounter(), 5); + EXPECT_EQ(monotonicCounter(), 6); EXPECT_EQ(table_->numSymbols(), 5); makeStat("6"); - EXPECT_EQ(monotonicCounter(), 6); + EXPECT_EQ(monotonicCounter(), 7); EXPECT_EQ(table_->numSymbols(), 6); } @@ -613,31 +673,6 @@ TEST_P(StatNameTest, StatNameSet) { EXPECT_EQ("remembered", table_->toString(remembered)); EXPECT_EQ(remembered.data(), set->getBuiltin("remembered", fallback).data()); EXPECT_EQ(fallback.data(), set->getBuiltin("not_remembered", fallback).data()); - - // Same test for a dynamically allocated name. The only difference between - // the behavior with a remembered vs dynamic name is that when looking - // up a remembered name, a mutex is not taken. But we have no easy way - // to test for that. So we'll at least cover the code. - const Stats::StatName dynamic = set->getDynamic("dynamic"); - EXPECT_EQ("dynamic", table_->toString(dynamic)); - EXPECT_EQ(dynamic.data(), set->getDynamic("dynamic").data()); - - // Make sure blanks are always the same. - const Stats::StatName blank = set->getDynamic(""); - EXPECT_EQ("", table_->toString(blank)); - EXPECT_EQ(blank.data(), set->getDynamic("").data()); - EXPECT_EQ(blank.data(), set->getDynamic("").data()); - EXPECT_EQ(blank.data(), set->getDynamic(absl::string_view()).data()); - - // There's another corner case for the same "dynamic" name from a - // different set. Here we will get a different StatName object - // out of the second set, though it will share the same underlying - // symbol-table symbol. - StatNameSetPtr set2(table_->makeSet("set2")); - const Stats::StatName dynamic2 = set2->getDynamic("dynamic"); - EXPECT_EQ("dynamic", table_->toString(dynamic2)); - EXPECT_EQ(dynamic2.data(), set2->getDynamic("dynamic").data()); - EXPECT_NE(dynamic2.data(), dynamic.data()); } TEST_P(StatNameTest, StorageCopy) { @@ -651,7 +686,7 @@ TEST_P(StatNameTest, StorageCopy) { TEST_P(StatNameTest, RecentLookups) { if (GetParam() == SymbolTableType::Fake) { - // touch these cover coverage for fake symbol tables, but they'll have no effect. + // Touch these for coverage of fake symbol tables, but they'll have no effect. table_->clearRecentLookups(); table_->setRecentLookupCapacity(0); return; @@ -660,21 +695,20 @@ TEST_P(StatNameTest, RecentLookups) { StatNameSetPtr set1(table_->makeSet("set1")); table_->setRecentLookupCapacity(10); StatNameSetPtr set2(table_->makeSet("set2")); - set1->getDynamic("dynamic.stat1"); - set2->getDynamic("dynamic.stat2"); + StatNameDynamicPool d1(*table_); + d1.add("dynamic.stat1"); + StatNameDynamicPool d2(*table_); + d2.add("dynamic.stat2"); encodeDecode("direct.stat"); std::vector accum; uint64_t total = table_->getRecentLookups([&accum](absl::string_view name, uint64_t count) { accum.emplace_back(absl::StrCat(count, ": ", name)); }); - EXPECT_EQ(5, total); + EXPECT_EQ(1, total); // Dynamic pool adds don't count as recent lookups. std::string recent_lookups_str = absl::StrJoin(accum, " "); - EXPECT_EQ("1: direct.stat " - "2: dynamic.stat1 " // Combines entries from set and symbol-table. - "2: dynamic.stat2", - recent_lookups_str); + EXPECT_EQ("1: direct.stat", recent_lookups_str); // No dynamic-pool lookups take locks. table_->clearRecentLookups(); uint32_t num_calls = 0; diff --git a/test/server/server_test.cc b/test/server/server_test.cc index 82d685be9cea1..80256fd67aeab 100644 --- a/test/server/server_test.cc +++ b/test/server/server_test.cc @@ -497,12 +497,10 @@ TEST_P(ServerStatsTest, FlushStats) { EXPECT_EQ(1, recent_lookups.value() - strobed_recent_lookups); strobed_recent_lookups = recent_lookups.value(); - // When we use a StatNameSet dynamic, we charge for the SymbolTable lookup and - // for the lookup in the StatNameSet as well, as that requires a lock for its - // dynamic hash_map. - test_set->getDynamic("c.d"); + // When we create a dynamic stat, there are no locks taken. + Stats::StatNameDynamicStorage dynamic_stat("c.d", stats_store_.symbolTable()); flushStats(); - EXPECT_EQ(2, recent_lookups.value() - strobed_recent_lookups); + EXPECT_EQ(recent_lookups.value(), strobed_recent_lookups); } INSTANTIATE_TEST_SUITE_P(IpVersions, ServerStatsTest,