From d4a9bf20f7378a59339a3059ae03ea718302a02b Mon Sep 17 00:00:00 2001 From: Kuat Yessenov Date: Wed, 23 Jul 2025 23:27:45 +0000 Subject: [PATCH 1/6] metric expiry Change-Id: If3a45283b13cfda7d4f9a7bb661a1573f552ed7e Signed-off-by: Kuat Yessenov --- envoy/stats/scope.h | 13 ++++ envoy/stats/stats.h | 5 ++ source/common/stats/allocator_impl.cc | 1 + source/common/stats/histogram_impl.h | 2 + source/common/stats/isolated_store_impl.h | 4 + source/common/stats/null_counter.h | 1 + source/common/stats/null_gauge.h | 1 + source/common/stats/null_text_readout.h | 1 + source/common/stats/thread_local_store.cc | 82 ++++++++++++++++++++ source/common/stats/thread_local_store.h | 4 + test/common/stats/thread_local_store_test.cc | 51 ++++++++++++ test/integration/server.h | 3 + test/mocks/stats/mocks.h | 4 + 13 files changed, 172 insertions(+) diff --git a/envoy/stats/scope.h b/envoy/stats/scope.h index 1116281d93026..614b59df11be2 100644 --- a/envoy/stats/scope.h +++ b/envoy/stats/scope.h @@ -218,6 +218,19 @@ class Scope : public std::enable_shared_from_this { */ virtual TextReadoutOptConstRef findTextReadout(StatName name) const PURE; + // evictAndMarkUsed removes unused stats and marks all the remaining stats + // unused. This should be called from a timer to reduce the cardinality + // of the time series. + // + // NOTE: Do not use this function when scope stats are stored on the + // workers by reference, since deletion on the worker might invalidate the + // reference. + // + // NOTE: Removal of stats is not deferred until the sink flush. This function + // should not be called more frequently than the stats flush interval. + // See https://github.com/envoyproxy/envoy/issues/23619. + virtual void evictAndMarkUnused() PURE; + /** * @return a reference to the symbol table. */ diff --git a/envoy/stats/stats.h b/envoy/stats/stats.h index 5d5b1f58c80cb..a8307baf836dd 100644 --- a/envoy/stats/stats.h +++ b/envoy/stats/stats.h @@ -93,6 +93,11 @@ class Metric : public RefcountInterface { */ virtual bool used() const PURE; + /** + * Clear any indicator on whether this metric has been updated. + */ + virtual void markUnused() PURE; + /** * Indicates whether this metric is hidden. */ diff --git a/source/common/stats/allocator_impl.cc b/source/common/stats/allocator_impl.cc index c8c6905f4f481..963279a96ca9e 100644 --- a/source/common/stats/allocator_impl.cc +++ b/source/common/stats/allocator_impl.cc @@ -85,6 +85,7 @@ template class StatsSharedImpl : public MetricImpl // Metric SymbolTable& symbolTable() final { return alloc_.symbolTable(); } bool used() const override { return flags_ & Metric::Flags::Used; } + void markUnused() override { flags_ &= ~Metric::Flags::Used; } bool hidden() const override { return flags_ & Metric::Flags::Hidden; } // RefcountInterface diff --git a/source/common/stats/histogram_impl.h b/source/common/stats/histogram_impl.h index 3b30f1b289df7..80718294b7856 100644 --- a/source/common/stats/histogram_impl.h +++ b/source/common/stats/histogram_impl.h @@ -111,6 +111,7 @@ class HistogramImpl : public HistogramImplHelper { void recordValue(uint64_t value) override { parent_.deliverHistogramToSinks(*this, value); } bool used() const override { return true; } + void markUnused() override {} bool hidden() const override { return false; } SymbolTable& symbolTable() final { return parent_.symbolTable(); } @@ -132,6 +133,7 @@ class NullHistogramImpl : public HistogramImplHelper { ~NullHistogramImpl() override { MetricImpl::clear(symbol_table_); } bool used() const override { return false; } + void markUnused() override {} bool hidden() const override { return false; } SymbolTable& symbolTable() override { return symbol_table_; } diff --git a/source/common/stats/isolated_store_impl.h b/source/common/stats/isolated_store_impl.h index 84ea935102b98..130bbac08e30f 100644 --- a/source/common/stats/isolated_store_impl.h +++ b/source/common/stats/isolated_store_impl.h @@ -353,6 +353,10 @@ class IsolatedScopeImpl : public Scope { return textReadoutFromStatName(storage.statName()); } + void evictAndMarkUnused() override { + // Do nothing. Eviction is only supported on thread local stores. + } + StatName prefix() const override { return prefix_.statName(); } IsolatedStoreImpl& store() override { return store_; } const IsolatedStoreImpl& constStore() const override { return store_; } diff --git a/source/common/stats/null_counter.h b/source/common/stats/null_counter.h index ca576310f0d79..f8b755c58c57d 100644 --- a/source/common/stats/null_counter.h +++ b/source/common/stats/null_counter.h @@ -31,6 +31,7 @@ class NullCounterImpl : public MetricImpl { // Metric bool used() const override { return false; } + void markUnused() override {} bool hidden() const override { return false; } SymbolTable& symbolTable() override { return symbol_table_; } diff --git a/source/common/stats/null_gauge.h b/source/common/stats/null_gauge.h index 5af09aa5999ee..642950de18d88 100644 --- a/source/common/stats/null_gauge.h +++ b/source/common/stats/null_gauge.h @@ -35,6 +35,7 @@ class NullGaugeImpl : public MetricImpl { // Metric bool used() const override { return false; } + void markUnused() override {} bool hidden() const override { return false; } SymbolTable& symbolTable() override { return symbol_table_; } diff --git a/source/common/stats/null_text_readout.h b/source/common/stats/null_text_readout.h index 3073fa8182b48..40a155b7ee66c 100644 --- a/source/common/stats/null_text_readout.h +++ b/source/common/stats/null_text_readout.h @@ -28,6 +28,7 @@ class NullTextReadoutImpl : public MetricImpl { // Metric bool used() const override { return false; } + void markUnused() override {} bool hidden() const override { return false; } SymbolTable& symbolTable() override { return symbol_table_; } diff --git a/source/common/stats/thread_local_store.cc b/source/common/stats/thread_local_store.cc index 175d7d473e3cb..cd09a19789b90 100644 --- a/source/common/stats/thread_local_store.cc +++ b/source/common/stats/thread_local_store.cc @@ -772,6 +772,80 @@ TextReadoutOptConstRef ThreadLocalStoreImpl::ScopeImpl::findTextReadout(StatName return findStatLockHeld(name, central_cache_->text_readouts_); } +void ThreadLocalStoreImpl::ScopeImpl::evictAndMarkUnused() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + + // If we are shutting down, we no longer perform eviction as workers may be shutting down + // and not able to complete their work. + if (!parent_.shutting_down_ && parent_.tls_cache_) { + auto stale_counters = std::make_shared>(); + auto stale_gauges = std::make_shared>(); + auto stale_text_readouts = std::make_shared>(); + auto stale_histograms = std::make_shared>(); + + auto collect_stale = [](std::shared_ptr>& stale_metrics) { + return [stale_metrics](std::pair kv) { + const auto& [name, metric] = kv; + if constexpr (std::same_as) { + // Histograms usage can only be detected after merge (which happens + // only during flush). Since eviction is less frequent than flushing, + // we can merge histograms here as well. + metric->merge(); + } + if (metric->used()) { + metric->markUnused(); + return false; + } else { + stale_metrics->try_emplace(name, metric); + return true; + } + }; + }; + + { + Thread::LockGuard lock(parent_.lock_); + absl::erase_if(central_cache_->counters_, collect_stale(stale_counters)); + absl::erase_if(central_cache_->gauges_, collect_stale(stale_gauges)); + absl::erase_if(central_cache_->text_readouts_, collect_stale(stale_text_readouts)); + absl::erase_if(central_cache_->histograms_, collect_stale(stale_histograms)); + } + + // At this point, central cache no longer returns the evicted stats, but we + // need to keep the storage for the evicted stats until after the thread + // local caches are cleared. + parent_.tls_cache_->runOnAllThreads( + [scope_id = scope_id_, stale_counters, stale_gauges, stale_text_readouts, + stale_histograms](OptRef tls_cache) { + TlsCacheEntry& entry = tls_cache->insertScope(scope_id); + absl::erase_if(entry.counters_, + [=](std::pair> kv) { + return stale_counters->contains(kv.first); + }); + absl::erase_if(entry.gauges_, [=](std::pair> kv) { + return stale_gauges->contains(kv.first); + }); + absl::erase_if(entry.text_readouts_, + [=](std::pair> kv) { + return stale_text_readouts->contains(kv.first); + }); + absl::erase_if(entry.parent_histograms_, + [=](std::pair kv) { + return stale_histograms->contains(kv.first); + }); + }, + [stale_counters, stale_gauges, stale_text_readouts, stale_histograms]() { + // We want to delete stale stats on the main thread since stat destructors lock the stats + // allocator. Eventually, we might also want to defer the deletion until the values are + // flushes to the sinks. This could happen if an unused stat is updated immediately after + // eviction (e.g. on thread local cache), before we get a chance to delete it from the + // cache. + ENVOY_LOG(info, "deleting stale {} counters, {} gauges, {} text readouts, {} histograms", + stale_counters->size(), stale_gauges->size(), stale_text_readouts->size(), + stale_histograms->size()); + }); + } +} + Histogram& ThreadLocalStoreImpl::tlsHistogram(ParentHistogramImpl& parent, uint64_t id) { // tlsHistogram() is generally not called for a histogram that is rejected by // the matcher, so no further rejection-checking is needed at this level. @@ -910,6 +984,14 @@ bool ParentHistogramImpl::used() const { return merged_; } +void ParentHistogramImpl::markUnused() { + merged_ = false; + Thread::ReleasableLockGuard lock(merge_lock_); + for (const TlsHistogramSharedPtr& tls_histogram : tls_histograms_) { + tls_histogram->markUnused(); + } +} + bool ParentHistogramImpl::hidden() const { return false; } void ParentHistogramImpl::merge() { diff --git a/source/common/stats/thread_local_store.h b/source/common/stats/thread_local_store.h index b2d7f54990ea7..127214b5c2b59 100644 --- a/source/common/stats/thread_local_store.h +++ b/source/common/stats/thread_local_store.h @@ -60,6 +60,7 @@ class ThreadLocalHistogramImpl : public HistogramImplHelper { // Stats::Metric SymbolTable& symbolTable() final { return symbol_table_; } bool used() const override { return used_; } + void markUnused() override { used_ = false; } bool hidden() const override { return false; } private: @@ -116,6 +117,7 @@ class ParentHistogramImpl : public MetricImpl { // Stats::Metric SymbolTable& symbolTable() override; bool used() const override; + void markUnused() override; bool hidden() const override; // RefcountInterface @@ -439,6 +441,8 @@ class ThreadLocalStoreImpl : Logger::Loggable, public StoreRo return central_cache_; } + void evictAndMarkUnused() override; + const uint64_t scope_id_; ThreadLocalStoreImpl& parent_; diff --git a/test/common/stats/thread_local_store_test.cc b/test/common/stats/thread_local_store_test.cc index e21ad17d3b4f0..d4fab2a4b43d2 100644 --- a/test/common/stats/thread_local_store_test.cc +++ b/test/common/stats/thread_local_store_test.cc @@ -606,6 +606,57 @@ TEST_F(StatsThreadLocalStoreTest, ScopeDelete) { tls_.shutdownThread(); } +TEST_F(StatsThreadLocalStoreTest, EvictAndMarkUnused) { + InSequence s; + store_->initializeThreading(main_thread_dispatcher_, tls_); + + ScopeSharedPtr scope = store_->createScope("scope."); + { + // References will become invalid. + Counter& c1 = scope->counterFromString("c1"); + c1.add(1); + EXPECT_TRUE(c1.used()); + + Gauge& g1 = scope->gaugeFromString("g1", Gauge::ImportMode::Accumulate); + g1.set(5); + EXPECT_TRUE(g1.used()); + + TextReadout& t1 = scope->textReadoutFromString("t1"); + t1.set("hello"); + EXPECT_TRUE(t1.used()); + + // Mark unused. + EXPECT_CALL(tls_, runOnAllThreads(_, _)).Times(testing::AtLeast(1)); + scope->evictAndMarkUnused(); + + EXPECT_EQ(&c1, &scope->counterFromString("c1")); + EXPECT_FALSE(c1.used()); + EXPECT_EQ(1, c1.value()); + EXPECT_EQ(1UL, store_->counters().size()); + + EXPECT_EQ(&g1, &scope->gaugeFromString("g1", Gauge::ImportMode::Accumulate)); + EXPECT_FALSE(g1.used()); + EXPECT_EQ(5, g1.value()); + EXPECT_EQ(1UL, store_->gauges().size()); + + EXPECT_EQ(&t1, &scope->textReadoutFromString("t1")); + EXPECT_FALSE(t1.used()); + EXPECT_EQ("hello", t1.value()); + EXPECT_EQ(1UL, store_->textReadouts().size()); + } + + // Remove. + EXPECT_CALL(tls_, runOnAllThreads(_, _)).Times(testing::AtLeast(1)); + scope->evictAndMarkUnused(); + EXPECT_EQ(0UL, store_->counters().size()); + EXPECT_EQ(0UL, store_->gauges().size()); + EXPECT_EQ(0UL, store_->textReadouts().size()); + + tls_.shutdownGlobalThreading(); + store_->shutdownThreading(); + tls_.shutdownThread(); +} + TEST_F(StatsThreadLocalStoreTest, NestedScopes) { InSequence s; store_->initializeThreading(main_thread_dispatcher_, tls_); diff --git a/test/integration/server.h b/test/integration/server.h index 335d55fa4431b..34637480ae36b 100644 --- a/test/integration/server.h +++ b/test/integration/server.h @@ -163,6 +163,8 @@ class TestScopeWrapper : public Scope { Store& store() override { return store_; } const Store& constStore() const override { return store_; } + void evictAndMarkUnused() override {} + private: Thread::MutexBasicLockable& lock_; ScopeSharedPtr wrapped_scope_; @@ -196,6 +198,7 @@ class NotifyingCounter : public Stats::Counter { uint32_t use_count() const override { return counter_->use_count(); } StatName tagExtractedStatName() const override { return counter_->tagExtractedStatName(); } bool used() const override { return counter_->used(); } + void markUnused() override { counter_->markUnused(); } bool hidden() const override { return counter_->hidden(); } SymbolTable& symbolTable() override { return counter_->symbolTable(); } const SymbolTable& constSymbolTable() const override { return counter_->constSymbolTable(); } diff --git a/test/mocks/stats/mocks.h b/test/mocks/stats/mocks.h index 2b0d25435ca9c..31ccbdf481024 100644 --- a/test/mocks/stats/mocks.h +++ b/test/mocks/stats/mocks.h @@ -142,6 +142,7 @@ class MockCounter : public MockStatWithRefcount { MOCK_METHOD(uint64_t, latch, ()); MOCK_METHOD(void, reset, ()); MOCK_METHOD(bool, used, (), (const)); + MOCK_METHOD(void, markUnused, ()); MOCK_METHOD(bool, hidden, (), (const)); MOCK_METHOD(uint64_t, value, (), (const)); @@ -164,6 +165,7 @@ class MockGauge : public MockStatWithRefcount { MOCK_METHOD(void, sub, (uint64_t amount)); MOCK_METHOD(void, mergeImportMode, (ImportMode)); MOCK_METHOD(bool, used, (), (const)); + MOCK_METHOD(void, markUnused, ()); MOCK_METHOD(bool, hidden, (), (const)); MOCK_METHOD(uint64_t, value, (), (const)); MOCK_METHOD(absl::optional, cachedShouldImport, (), (const)); @@ -181,6 +183,7 @@ class MockHistogram : public MockMetric { ~MockHistogram() override; MOCK_METHOD(bool, used, (), (const)); + MOCK_METHOD(void, markUnused, ()); MOCK_METHOD(bool, hidden, (), (const)); MOCK_METHOD(Histogram::Unit, unit, (), (const)); MOCK_METHOD(void, recordValue, (uint64_t value)); @@ -206,6 +209,7 @@ class MockParentHistogram : public MockMetric { std::string quantileSummary() const override { return ""; }; std::string bucketSummary() const override { return ""; }; MOCK_METHOD(bool, used, (), (const)); + MOCK_METHOD(void, markUnused, ()); MOCK_METHOD(bool, hidden, (), (const)); MOCK_METHOD(Histogram::Unit, unit, (), (const)); MOCK_METHOD(void, recordValue, (uint64_t value)); From c51347ebed1e86c2be3c289e3ab0b8625e57862a Mon Sep 17 00:00:00 2001 From: Kuat Yessenov Date: Thu, 24 Jul 2025 05:35:47 +0000 Subject: [PATCH 2/6] add histogram Change-Id: I6748662507d4b540076381379a26f53a924cb815 Signed-off-by: Kuat Yessenov --- test/common/stats/thread_local_store_test.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/common/stats/thread_local_store_test.cc b/test/common/stats/thread_local_store_test.cc index d4fab2a4b43d2..e1e4d9c5f2fa9 100644 --- a/test/common/stats/thread_local_store_test.cc +++ b/test/common/stats/thread_local_store_test.cc @@ -625,6 +625,10 @@ TEST_F(StatsThreadLocalStoreTest, EvictAndMarkUnused) { t1.set("hello"); EXPECT_TRUE(t1.used()); + Histogram& h1 = scope->histogramFromString("h1", Histogram::Unit::Unspecified); + EXPECT_CALL(sink_, onHistogramComplete(Ref(h1), 1)); + h1.recordValue(1); + // Mark unused. EXPECT_CALL(tls_, runOnAllThreads(_, _)).Times(testing::AtLeast(1)); scope->evictAndMarkUnused(); @@ -643,6 +647,10 @@ TEST_F(StatsThreadLocalStoreTest, EvictAndMarkUnused) { EXPECT_FALSE(t1.used()); EXPECT_EQ("hello", t1.value()); EXPECT_EQ(1UL, store_->textReadouts().size()); + + EXPECT_EQ(&h1, &scope->histogramFromString("h1", Histogram::Unit::Unspecified)); + EXPECT_FALSE(t1.used()); + EXPECT_EQ(1UL, store_->histograms().size()); } // Remove. @@ -651,6 +659,7 @@ TEST_F(StatsThreadLocalStoreTest, EvictAndMarkUnused) { EXPECT_EQ(0UL, store_->counters().size()); EXPECT_EQ(0UL, store_->gauges().size()); EXPECT_EQ(0UL, store_->textReadouts().size()); + EXPECT_EQ(0UL, store_->histograms().size()); tls_.shutdownGlobalThreading(); store_->shutdownThreading(); From 2d138eb25c11ac8410736eb7e86104e2914e5460 Mon Sep 17 00:00:00 2001 From: Kuat Yessenov Date: Fri, 25 Jul 2025 17:02:30 +0000 Subject: [PATCH 3/6] format Change-Id: I9c50b09b748e3164f3520bbb0d506b4f0b4916e2 Signed-off-by: Kuat Yessenov --- source/common/stats/thread_local_store.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/source/common/stats/thread_local_store.cc b/source/common/stats/thread_local_store.cc index cd09a19789b90..41b5e0d9969d9 100644 --- a/source/common/stats/thread_local_store.cc +++ b/source/common/stats/thread_local_store.cc @@ -835,11 +835,10 @@ void ThreadLocalStoreImpl::ScopeImpl::evictAndMarkUnused() { }, [stale_counters, stale_gauges, stale_text_readouts, stale_histograms]() { // We want to delete stale stats on the main thread since stat destructors lock the stats - // allocator. Eventually, we might also want to defer the deletion until the values are - // flushes to the sinks. This could happen if an unused stat is updated immediately after - // eviction (e.g. on thread local cache), before we get a chance to delete it from the - // cache. - ENVOY_LOG(info, "deleting stale {} counters, {} gauges, {} text readouts, {} histograms", + // allocator. Note that we might have received fresh values on the stale cache-local + // stats. Eventually, we might also want to defer the deletion further until the values + // are flushed to the sinks. + ENVOY_LOG(debug, "deleting stale {} counters, {} gauges, {} text readouts, {} histograms", stale_counters->size(), stale_gauges->size(), stale_text_readouts->size(), stale_histograms->size()); }); From 8f5bd19f17f577a495d3593e573eb5f32b1898e3 Mon Sep 17 00:00:00 2001 From: Kuat Yessenov Date: Sat, 2 Aug 2025 00:35:31 +0000 Subject: [PATCH 4/6] update Change-Id: I8ee694725cd151baf80b6760cf687117c502546a Signed-off-by: Kuat Yessenov --- envoy/stats/scope.h | 21 +-- envoy/stats/store.h | 9 +- source/common/stats/allocator_impl.cc | 2 +- source/common/stats/isolated_store_impl.cc | 6 +- source/common/stats/isolated_store_impl.h | 12 +- source/common/stats/thread_local_store.cc | 186 +++++++++++-------- source/common/stats/thread_local_store.h | 16 +- source/server/server.cc | 35 +++- source/server/server.h | 8 +- test/common/stats/thread_local_store_test.cc | 54 ++++-- test/mocks/stats/mocks.h | 4 +- 11 files changed, 218 insertions(+), 135 deletions(-) diff --git a/envoy/stats/scope.h b/envoy/stats/scope.h index 614b59df11be2..e76582b34c430 100644 --- a/envoy/stats/scope.h +++ b/envoy/stats/scope.h @@ -71,8 +71,10 @@ class Scope : public std::enable_shared_from_this { * See also scopeFromStatName, which is preferred. * * @param name supplies the scope's namespace prefix. + * @param evictable whether unused metrics can be deleted from the scope caches. This requires + * that the metrics are not stored by reference. */ - virtual ScopeSharedPtr createScope(const std::string& name) PURE; + virtual ScopeSharedPtr createScope(const std::string& name, bool evictable = false) PURE; /** * Allocate a new scope. NOTE: The implementation should correctly handle overlapping scopes @@ -80,8 +82,10 @@ class Scope : public std::enable_shared_from_this { * gracefully swapped in while an old scope with the same name is being destroyed. * * @param name supplies the scope's namespace prefix. + * @param evictable whether unused metrics can be deleted from the scope caches. This requires + * that the metrics are not stored by reference. */ - virtual ScopeSharedPtr scopeFromStatName(StatName name) PURE; + virtual ScopeSharedPtr scopeFromStatName(StatName name, bool evictable = false) PURE; /** * Creates a Counter from the stat name. Tag extraction will be performed on the name. @@ -218,19 +222,6 @@ class Scope : public std::enable_shared_from_this { */ virtual TextReadoutOptConstRef findTextReadout(StatName name) const PURE; - // evictAndMarkUsed removes unused stats and marks all the remaining stats - // unused. This should be called from a timer to reduce the cardinality - // of the time series. - // - // NOTE: Do not use this function when scope stats are stored on the - // workers by reference, since deletion on the worker might invalidate the - // reference. - // - // NOTE: Removal of stats is not deferred until the sink flush. This function - // should not be called more frequently than the stats flush interval. - // See https://github.com/envoyproxy/envoy/issues/23619. - virtual void evictAndMarkUnused() PURE; - /** * @return a reference to the symbol table. */ diff --git a/envoy/stats/store.h b/envoy/stats/store.h index ff1c6a08c53e0..2d816d9b2c2cf 100644 --- a/envoy/stats/store.h +++ b/envoy/stats/store.h @@ -117,6 +117,11 @@ class Store { virtual void forEachHistogram(SizeFn f_size, StatFn f_stat) const PURE; virtual void forEachScope(SizeFn f_size, StatFn f_stat) const PURE; + /** + * Evict unused metrics from all the scope caches. + */ + virtual void evictUnused() PURE; + /** * @return a null counter that will ignore increments and always return 0. */ @@ -172,7 +177,9 @@ class Store { /** * @return a scope of the given name. */ - ScopeSharedPtr createScope(const std::string& name) { return rootScope()->createScope(name); } + ScopeSharedPtr createScope(const std::string& name, bool evictable = false) { + return rootScope()->createScope(name, evictable); + } /** * Extracts tags from the name and appends them to the provided StatNameTagVector. diff --git a/source/common/stats/allocator_impl.cc b/source/common/stats/allocator_impl.cc index 963279a96ca9e..6cd435abd656a 100644 --- a/source/common/stats/allocator_impl.cc +++ b/source/common/stats/allocator_impl.cc @@ -211,7 +211,7 @@ class GaugeImpl : public StatsSharedImpl { } void sub(uint64_t amount) override { ASSERT(child_value_ >= amount); - ASSERT(used() || amount == 0); + // ASSERT(used() || amount == 0); child_value_ -= amount; } uint64_t value() const override { return child_value_ + parent_value_; } diff --git a/source/common/stats/isolated_store_impl.cc b/source/common/stats/isolated_store_impl.cc index 9b4097971eef5..d070a996a1f39 100644 --- a/source/common/stats/isolated_store_impl.cc +++ b/source/common/stats/isolated_store_impl.cc @@ -63,12 +63,12 @@ ConstScopeSharedPtr IsolatedStoreImpl::constRootScope() const { IsolatedStoreImpl::~IsolatedStoreImpl() = default; -ScopeSharedPtr IsolatedScopeImpl::createScope(const std::string& name) { +ScopeSharedPtr IsolatedScopeImpl::createScope(const std::string& name, bool) { StatNameManagedStorage stat_name_storage(Utility::sanitizeStatsName(name), symbolTable()); - return scopeFromStatName(stat_name_storage.statName()); + return scopeFromStatName(stat_name_storage.statName(), false); } -ScopeSharedPtr IsolatedScopeImpl::scopeFromStatName(StatName name) { +ScopeSharedPtr IsolatedScopeImpl::scopeFromStatName(StatName name, bool) { SymbolTable::StoragePtr prefix_name_storage = symbolTable().join({prefix(), name}); ScopeSharedPtr scope = store_.makeScope(StatName(prefix_name_storage.get())); addScopeToStore(scope); diff --git a/source/common/stats/isolated_store_impl.h b/source/common/stats/isolated_store_impl.h index 130bbac08e30f..a8f273492e52d 100644 --- a/source/common/stats/isolated_store_impl.h +++ b/source/common/stats/isolated_store_impl.h @@ -203,6 +203,10 @@ class IsolatedStoreImpl : public Store { } } + void evictUnused() override { + // Do nothing. Eviction is only supported on thread local stores. + } + void forEachSinkedCounter(SizeFn f_size, StatFn f_stat) const override { forEachCounter(f_size, f_stat); } @@ -295,8 +299,8 @@ class IsolatedScopeImpl : public Scope { StatNameTagVectorOptConstRef tags) override { return store_.counters_.get(prefix(), name, tags, symbolTable()); } - ScopeSharedPtr createScope(const std::string& name) override; - ScopeSharedPtr scopeFromStatName(StatName name) override; + ScopeSharedPtr createScope(const std::string& name, bool evictable) override; + ScopeSharedPtr scopeFromStatName(StatName name, bool evictable) override; Gauge& gaugeFromStatNameWithTags(const StatName& name, StatNameTagVectorOptConstRef tags, Gauge::ImportMode import_mode) override { Gauge& gauge = store_.gauges_.get(prefix(), name, tags, symbolTable(), import_mode); @@ -353,10 +357,6 @@ class IsolatedScopeImpl : public Scope { return textReadoutFromStatName(storage.statName()); } - void evictAndMarkUnused() override { - // Do nothing. Eviction is only supported on thread local stores. - } - StatName prefix() const override { return prefix_.statName(); } IsolatedStoreImpl& store() override { return store_; } const IsolatedStoreImpl& constStore() const override { return store_; } diff --git a/source/common/stats/thread_local_store.cc b/source/common/stats/thread_local_store.cc index 41b5e0d9969d9..b6388f58c85a5 100644 --- a/source/common/stats/thread_local_store.cc +++ b/source/common/stats/thread_local_store.cc @@ -38,7 +38,7 @@ ThreadLocalStoreImpl::ThreadLocalStoreImpl(Allocator& alloc) well_known_tags_->rememberBuiltin(desc.name_); } StatNameManagedStorage empty("", alloc.symbolTable()); - auto new_scope = std::make_shared(*this, StatName(empty.statName())); + auto new_scope = std::make_shared(*this, StatName(empty.statName()), false); addScope(new_scope); default_scope_ = new_scope; } @@ -154,14 +154,15 @@ std::vector ThreadLocalStoreImpl::counters() const { return ret; } -ScopeSharedPtr ThreadLocalStoreImpl::ScopeImpl::createScope(const std::string& name) { +ScopeSharedPtr ThreadLocalStoreImpl::ScopeImpl::createScope(const std::string& name, + bool evictable) { StatNameManagedStorage stat_name_storage(Utility::sanitizeStatsName(name), symbolTable()); - return scopeFromStatName(stat_name_storage.statName()); + return scopeFromStatName(stat_name_storage.statName(), evictable); } -ScopeSharedPtr ThreadLocalStoreImpl::ScopeImpl::scopeFromStatName(StatName name) { +ScopeSharedPtr ThreadLocalStoreImpl::ScopeImpl::scopeFromStatName(StatName name, bool evictable) { SymbolTable::StoragePtr joined = symbolTable().join({prefix_.statName(), name}); - auto new_scope = std::make_shared(parent_, StatName(joined.get())); + auto new_scope = std::make_shared(parent_, StatName(joined.get()), evictable); parent_.addScope(new_scope); return new_scope; } @@ -394,8 +395,9 @@ void ThreadLocalStoreImpl::clearHistogramsFromCaches() { } } -ThreadLocalStoreImpl::ScopeImpl::ScopeImpl(ThreadLocalStoreImpl& parent, StatName prefix) - : scope_id_(parent.next_scope_id_++), parent_(parent), +ThreadLocalStoreImpl::ScopeImpl::ScopeImpl(ThreadLocalStoreImpl& parent, StatName prefix, + bool evictable) + : scope_id_(parent.next_scope_id_++), parent_(parent), evictable_(evictable), prefix_(prefix, parent.alloc_.symbolTable()), central_cache_(new CentralCacheEntry(parent.alloc_.symbolTable())) {} @@ -772,79 +774,6 @@ TextReadoutOptConstRef ThreadLocalStoreImpl::ScopeImpl::findTextReadout(StatName return findStatLockHeld(name, central_cache_->text_readouts_); } -void ThreadLocalStoreImpl::ScopeImpl::evictAndMarkUnused() { - ASSERT_IS_MAIN_OR_TEST_THREAD(); - - // If we are shutting down, we no longer perform eviction as workers may be shutting down - // and not able to complete their work. - if (!parent_.shutting_down_ && parent_.tls_cache_) { - auto stale_counters = std::make_shared>(); - auto stale_gauges = std::make_shared>(); - auto stale_text_readouts = std::make_shared>(); - auto stale_histograms = std::make_shared>(); - - auto collect_stale = [](std::shared_ptr>& stale_metrics) { - return [stale_metrics](std::pair kv) { - const auto& [name, metric] = kv; - if constexpr (std::same_as) { - // Histograms usage can only be detected after merge (which happens - // only during flush). Since eviction is less frequent than flushing, - // we can merge histograms here as well. - metric->merge(); - } - if (metric->used()) { - metric->markUnused(); - return false; - } else { - stale_metrics->try_emplace(name, metric); - return true; - } - }; - }; - - { - Thread::LockGuard lock(parent_.lock_); - absl::erase_if(central_cache_->counters_, collect_stale(stale_counters)); - absl::erase_if(central_cache_->gauges_, collect_stale(stale_gauges)); - absl::erase_if(central_cache_->text_readouts_, collect_stale(stale_text_readouts)); - absl::erase_if(central_cache_->histograms_, collect_stale(stale_histograms)); - } - - // At this point, central cache no longer returns the evicted stats, but we - // need to keep the storage for the evicted stats until after the thread - // local caches are cleared. - parent_.tls_cache_->runOnAllThreads( - [scope_id = scope_id_, stale_counters, stale_gauges, stale_text_readouts, - stale_histograms](OptRef tls_cache) { - TlsCacheEntry& entry = tls_cache->insertScope(scope_id); - absl::erase_if(entry.counters_, - [=](std::pair> kv) { - return stale_counters->contains(kv.first); - }); - absl::erase_if(entry.gauges_, [=](std::pair> kv) { - return stale_gauges->contains(kv.first); - }); - absl::erase_if(entry.text_readouts_, - [=](std::pair> kv) { - return stale_text_readouts->contains(kv.first); - }); - absl::erase_if(entry.parent_histograms_, - [=](std::pair kv) { - return stale_histograms->contains(kv.first); - }); - }, - [stale_counters, stale_gauges, stale_text_readouts, stale_histograms]() { - // We want to delete stale stats on the main thread since stat destructors lock the stats - // allocator. Note that we might have received fresh values on the stale cache-local - // stats. Eventually, we might also want to defer the deletion further until the values - // are flushed to the sinks. - ENVOY_LOG(debug, "deleting stale {} counters, {} gauges, {} text readouts, {} histograms", - stale_counters->size(), stale_gauges->size(), stale_text_readouts->size(), - stale_histograms->size()); - }); - } -} - Histogram& ThreadLocalStoreImpl::tlsHistogram(ParentHistogramImpl& parent, uint64_t id) { // tlsHistogram() is generally not called for a histogram that is rejected by // the matcher, so no further rejection-checking is needed at this level. @@ -1111,6 +1040,103 @@ void ThreadLocalStoreImpl::forEachScope(std::function f_size, } } +namespace { +struct MetricBag { + explicit MetricBag(uint64_t scope_id) : scope_id_(scope_id) {} + const uint64_t scope_id_; + StatNameHashMap counters_; + StatNameHashMap gauges_; + StatNameHashMap histograms_; + StatNameHashMap text_readouts_; + bool empty() const { + return counters_.empty() && gauges_.empty() && histograms_.empty() && text_readouts_.empty(); + } +}; + +} // namespace + +void ThreadLocalStoreImpl::evictUnused() { + ASSERT_IS_MAIN_OR_TEST_THREAD(); + + // If we are shutting down, we no longer perform eviction as workers may be shutting down + // and not able to complete their work. + if (shutting_down_ || !tls_cache_) { + return; + } + + auto evicted_metrics = std::make_shared>(); + { + Thread::LockGuard lock(lock_); + iterateScopesLockHeld([evicted_metrics](const ScopeImplSharedPtr& scope) -> bool { + if (scope->evictable_) { + MetricBag metrics(scope->scope_id_); + CentralCacheEntrySharedPtr& central_cache = scope->centralCacheMutableNoThreadAnalysis(); + auto collect_unused = [](StatNameHashMap& unused_metrics) { + return [&unused_metrics](std::pair kv) { + const auto& [name, metric] = kv; + if (metric->used()) { + return false; + } else { + unused_metrics.try_emplace(name, metric); + return true; + } + }; + }; + absl::erase_if(central_cache->counters_, collect_unused(metrics.counters_)); + absl::erase_if(central_cache->gauges_, collect_unused(metrics.gauges_)); + absl::erase_if(central_cache->text_readouts_, collect_unused(metrics.text_readouts_)); + absl::erase_if(central_cache->histograms_, collect_unused(metrics.histograms_)); + if (!metrics.empty()) { + evicted_metrics->push_back(std::move(metrics)); + } + } + return true; + }); + } + + // At this point, central caches no longer return the evicted stats, but we + // need to keep the storage for the evicted stats until after the thread + // local caches are cleared. + if (!evicted_metrics->empty()) { + tls_cache_->runOnAllThreads( + [evicted_metrics](OptRef tls_cache) { + for (const auto& metrics : *evicted_metrics) { + TlsCacheEntry& entry = tls_cache->insertScope(metrics.scope_id_); + absl::erase_if(entry.counters_, + [=](std::pair> kv) { + return metrics.counters_.contains(kv.first); + }); + absl::erase_if(entry.gauges_, + [=](std::pair> kv) { + return metrics.gauges_.contains(kv.first); + }); + absl::erase_if(entry.text_readouts_, + [=](std::pair> kv) { + return metrics.text_readouts_.contains(kv.first); + }); + absl::erase_if(entry.parent_histograms_, + [=](std::pair kv) { + return metrics.histograms_.contains(kv.first); + }); + } + }, + [evicted_metrics]() { + // We want to delete stale stats on the main thread since stat + // destructors lock the stats allocator. Note that we might have + // received fresh values on the stale cache-local stats. Eventually, we + // might also want to defer the deletion further in the allocator until + // the values are flushed to the sinks. + for (const auto& metrics : *evicted_metrics) { + ENVOY_LOG(info, + "deleted stale {} counters, {} gauges, {} text readouts, {} histograms from " + "scope {}", + metrics.counters_.size(), metrics.gauges_.size(), + metrics.text_readouts_.size(), metrics.histograms_.size(), metrics.scope_id_); + } + }); + } +} + bool ThreadLocalStoreImpl::iterateScopesLockHeld( const std::function fn) const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) { diff --git a/source/common/stats/thread_local_store.h b/source/common/stats/thread_local_store.h index 127214b5c2b59..8a598b82713ce 100644 --- a/source/common/stats/thread_local_store.h +++ b/source/common/stats/thread_local_store.h @@ -186,6 +186,8 @@ class ThreadLocalStoreImpl : Logger::Loggable, public StoreRo void forEachHistogram(SizeFn f_size, StatFn f_stat) const override; void forEachScope(SizeFn f_size, StatFn f_stat) const override; + void evictUnused() override; + // Stats::StoreRoot void addSink(Sink& sink) override { timer_sinks_.push_back(sink); } void setTagProducer(TagProducerPtr&& tag_producer) override { @@ -279,7 +281,7 @@ class ThreadLocalStoreImpl : Logger::Loggable, public StoreRo using CentralCacheEntrySharedPtr = RefcountPtr; struct ScopeImpl : public Scope { - ScopeImpl(ThreadLocalStoreImpl& parent, StatName prefix); + ScopeImpl(ThreadLocalStoreImpl& parent, StatName prefix, bool evictable); ~ScopeImpl() override; // Stats::Scope @@ -292,8 +294,8 @@ class ThreadLocalStoreImpl : Logger::Loggable, public StoreRo Histogram::Unit unit) override; TextReadout& textReadoutFromStatNameWithTags(const StatName& name, StatNameTagVectorOptConstRef tags) override; - ScopeSharedPtr createScope(const std::string& name) override; - ScopeSharedPtr scopeFromStatName(StatName name) override; + ScopeSharedPtr createScope(const std::string& name, bool evictale) override; + ScopeSharedPtr scopeFromStatName(StatName name, bool evictable) override; const SymbolTable& constSymbolTable() const final { return parent_.constSymbolTable(); } SymbolTable& symbolTable() final { return parent_.symbolTable(); } @@ -431,6 +433,11 @@ class ThreadLocalStoreImpl : Logger::Loggable, public StoreRo return central_cache_; } + CentralCacheEntrySharedPtr& + centralCacheMutableNoThreadAnalysis() const ABSL_NO_THREAD_SAFETY_ANALYSIS { + return central_cache_; + } + // Returns the central cache, bypassing thread analysis. // // This is used only when passing references to maps held in the central @@ -441,10 +448,9 @@ class ThreadLocalStoreImpl : Logger::Loggable, public StoreRo return central_cache_; } - void evictAndMarkUnused() override; - const uint64_t scope_id_; ThreadLocalStoreImpl& parent_; + const bool evictable_{}; private: StatNameStorage prefix_; diff --git a/source/server/server.cc b/source/server/server.cc index 24e233bf35da3..07ad1f1ba1d03 100644 --- a/source/server/server.cc +++ b/source/server/server.cc @@ -170,15 +170,18 @@ void InstanceBase::failHealthcheck(bool fail) { MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, Upstream::ClusterManager& cluster_manager, - TimeSource& time_source) { + TimeSource& time_source, bool mark_unused) { store.forEachSinkedCounter( [this](std::size_t size) { snapped_counters_.reserve(size); counters_.reserve(size); }, - [this](Stats::Counter& counter) { + [this, mark_unused](Stats::Counter& counter) { snapped_counters_.push_back(Stats::CounterSharedPtr(&counter)); counters_.push_back({counter.latch(), counter}); + if (mark_unused) { + counter.markUnused(); + } }); store.forEachSinkedGauge( @@ -186,9 +189,12 @@ MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, snapped_gauges_.reserve(size); gauges_.reserve(size); }, - [this](Stats::Gauge& gauge) { + [this, mark_unused](Stats::Gauge& gauge) { snapped_gauges_.push_back(Stats::GaugeSharedPtr(&gauge)); gauges_.push_back(gauge); + if (mark_unused) { + gauge.markUnused(); + } }); store.forEachSinkedHistogram( @@ -196,9 +202,12 @@ MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, snapped_histograms_.reserve(size); histograms_.reserve(size); }, - [this](Stats::ParentHistogram& histogram) { + [this, mark_unused](Stats::ParentHistogram& histogram) { snapped_histograms_.push_back(Stats::ParentHistogramSharedPtr(&histogram)); histograms_.push_back(histogram); + if (mark_unused) { + histogram.markUnused(); + } }); store.forEachSinkedTextReadout( @@ -206,9 +215,12 @@ MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, snapped_text_readouts_.reserve(size); text_readouts_.reserve(size); }, - [this](Stats::TextReadout& text_readout) { + [this, mark_unused](Stats::TextReadout& text_readout) { snapped_text_readouts_.push_back(Stats::TextReadoutSharedPtr(&text_readout)); text_readouts_.push_back(text_readout); + if (mark_unused) { + text_readout.markUnused(); + } }); Upstream::HostUtility::forEachHostMetric( @@ -224,12 +236,19 @@ MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, } void InstanceUtil::flushMetricsToSinks(const std::list& sinks, Stats::Store& store, - Upstream::ClusterManager& cm, TimeSource& time_source) { + Upstream::ClusterManager& cm, TimeSource& time_source, + bool evict) { + // Eviction will schedule deletion of the stale metrics from the central caches on the main loop, + // so they are still present for the duration of this function. + if (evict) { + store.evictUnused(); + } + // Create a snapshot and flush to all sinks. // NOTE: Even if there are no sinks, creating the snapshot has the important property that it // latches all counters on a periodic basis. The hot restart code assumes this is being // done so this should not be removed. - MetricSnapshotImpl snapshot(store, cm, time_source); + MetricSnapshotImpl snapshot(store, cm, time_source, evict); for (const auto& sink : sinks) { sink->flush(snapshot); } @@ -291,7 +310,7 @@ void InstanceBase::flushStatsInternal() { updateServerStats(); auto& stats_config = config_.statsConfig(); InstanceUtil::flushMetricsToSinks(stats_config.sinks(), stats_store_, clusterManager(), - timeSource()); + timeSource(), true); // TODO(ramaraochavali): consider adding different flush interval for histograms. if (stat_flush_timer_ != nullptr) { stat_flush_timer_->enableTimer(stats_config.flushInterval()); diff --git a/source/server/server.h b/source/server/server.h index 88ffac5cdd404..151150bdc9f2d 100644 --- a/source/server/server.h +++ b/source/server/server.h @@ -132,9 +132,11 @@ class InstanceUtil : Logger::Loggable { * flush() on each sink. * @param sinks supplies the list of sinks. * @param store provides the store being flushed. + * @param evict whether to delete the stale metrics and clear the usage flags. */ static void flushMetricsToSinks(const std::list& sinks, Stats::Store& store, - Upstream::ClusterManager& cm, TimeSource& time_source); + Upstream::ClusterManager& cm, TimeSource& time_source, + bool evict); /** * Load a bootstrap config and perform validation. @@ -468,8 +470,10 @@ class InstanceBase : Logger::Loggable, // copying and probably be a cleaner API in general. class MetricSnapshotImpl : public Stats::MetricSnapshot { public: + // MetricSnapshotImpl captures a snapshot of metrics by latching the delta usage, and optionally + // marking the stats as used. explicit MetricSnapshotImpl(Stats::Store& store, Upstream::ClusterManager& cluster_manager, - TimeSource& time_source); + TimeSource& time_source, bool mark_unused); // Stats::MetricSnapshot const std::vector& counters() override { return counters_; } diff --git a/test/common/stats/thread_local_store_test.cc b/test/common/stats/thread_local_store_test.cc index e1e4d9c5f2fa9..abaef9f8b6e4e 100644 --- a/test/common/stats/thread_local_store_test.cc +++ b/test/common/stats/thread_local_store_test.cc @@ -606,14 +606,16 @@ TEST_F(StatsThreadLocalStoreTest, ScopeDelete) { tls_.shutdownThread(); } -TEST_F(StatsThreadLocalStoreTest, EvictAndMarkUnused) { +TEST_F(StatsThreadLocalStoreTest, Eviction) { InSequence s; store_->initializeThreading(main_thread_dispatcher_, tls_); - ScopeSharedPtr scope = store_->createScope("scope."); + ScopeSharedPtr scope = store_->createScope("scope.", true); + ScopeSharedPtr scope1 = store_->createScope("scope.", true); + // References will become invalid, so we create a lexical scope. { - // References will become invalid. Counter& c1 = scope->counterFromString("c1"); + EXPECT_EQ(&c1, &scope1->counterFromString("c1")); c1.add(1); EXPECT_TRUE(c1.used()); @@ -628,39 +630,67 @@ TEST_F(StatsThreadLocalStoreTest, EvictAndMarkUnused) { Histogram& h1 = scope->histogramFromString("h1", Histogram::Unit::Unspecified); EXPECT_CALL(sink_, onHistogramComplete(Ref(h1), 1)); h1.recordValue(1); + store_->mergeHistograms([]() -> void {}); - // Mark unused. - EXPECT_CALL(tls_, runOnAllThreads(_, _)).Times(testing::AtLeast(1)); - scope->evictAndMarkUnused(); + // Eviction does nothing. + store_->evictUnused(); EXPECT_EQ(&c1, &scope->counterFromString("c1")); - EXPECT_FALSE(c1.used()); + EXPECT_TRUE(c1.used()); EXPECT_EQ(1, c1.value()); EXPECT_EQ(1UL, store_->counters().size()); EXPECT_EQ(&g1, &scope->gaugeFromString("g1", Gauge::ImportMode::Accumulate)); - EXPECT_FALSE(g1.used()); + EXPECT_EQ(&g1, &scope1->gaugeFromString("g1", Gauge::ImportMode::Accumulate)); + EXPECT_TRUE(g1.used()); EXPECT_EQ(5, g1.value()); EXPECT_EQ(1UL, store_->gauges().size()); EXPECT_EQ(&t1, &scope->textReadoutFromString("t1")); - EXPECT_FALSE(t1.used()); + EXPECT_EQ(&t1, &scope1->textReadoutFromString("t1")); + EXPECT_TRUE(t1.used()); EXPECT_EQ("hello", t1.value()); EXPECT_EQ(1UL, store_->textReadouts().size()); EXPECT_EQ(&h1, &scope->histogramFromString("h1", Histogram::Unit::Unspecified)); - EXPECT_FALSE(t1.used()); + EXPECT_EQ(&h1, &scope1->histogramFromString("h1", Histogram::Unit::Unspecified)); + EXPECT_TRUE(h1.used()); EXPECT_EQ(1UL, store_->histograms().size()); + + // Mark unused all metrics. + c1.markUnused(); + g1.markUnused(); + t1.markUnused(); + h1.markUnused(); + EXPECT_FALSE(c1.used()); + EXPECT_FALSE(g1.used()); + EXPECT_FALSE(t1.used()); + EXPECT_FALSE(h1.used()); } - // Remove. + // Eviction removes here. EXPECT_CALL(tls_, runOnAllThreads(_, _)).Times(testing::AtLeast(1)); - scope->evictAndMarkUnused(); + store_->evictUnused(); EXPECT_EQ(0UL, store_->counters().size()); EXPECT_EQ(0UL, store_->gauges().size()); EXPECT_EQ(0UL, store_->textReadouts().size()); EXPECT_EQ(0UL, store_->histograms().size()); + // Make sure no stale data is on caches. + { + scope->counterFromString("c1").add(1); + scope1->counterFromString("c1").add(1); + scope->gaugeFromString("g1", Gauge::ImportMode::Accumulate).set(5); + scope1->gaugeFromString("g1", Gauge::ImportMode::Accumulate).set(5); + scope->textReadoutFromString("t1").set("hello"); + scope1->textReadoutFromString("t1").set("hello"); + Histogram& h1 = scope->histogramFromString("h1", Histogram::Unit::Unspecified); + EXPECT_CALL(sink_, onHistogramComplete(Ref(h1), 1)); + h1.recordValue(1); + Histogram& h2 = scope1->histogramFromString("h1", Histogram::Unit::Unspecified); + EXPECT_EQ(&h1, &h2); + } + tls_.shutdownGlobalThreading(); store_->shutdownThreading(); tls_.shutdownThread(); diff --git a/test/mocks/stats/mocks.h b/test/mocks/stats/mocks.h index 31ccbdf481024..e476b6bf75776 100644 --- a/test/mocks/stats/mocks.h +++ b/test/mocks/stats/mocks.h @@ -296,10 +296,10 @@ class MockScope : public TestUtil::TestScope { public: MockScope(StatName prefix, MockStore& store); - ScopeSharedPtr createScope(const std::string& name) override { + ScopeSharedPtr createScope(const std::string& name, bool) override { return ScopeSharedPtr(createScope_(name)); } - ScopeSharedPtr scopeFromStatName(StatName name) override { + ScopeSharedPtr scopeFromStatName(StatName name, bool) override { return createScope_(symbolTable().toString(name)); } From 961e8f5c109d4f1a274cbc871d55eb86c9bcb2a1 Mon Sep 17 00:00:00 2001 From: Kuat Yessenov Date: Tue, 5 Aug 2025 21:31:25 +0000 Subject: [PATCH 5/6] finish api and docs Change-Id: Iec9a7db939baed48cfcc52119d331002c4d662fe Signed-off-by: Kuat Yessenov --- api/envoy/config/bootstrap/v3/bootstrap.proto | 10 ++++- changelogs/current.yaml | 3 ++ envoy/server/configuration.h | 5 +++ envoy/stats/store.h | 2 +- source/common/stats/allocator_impl.cc | 2 +- source/common/stats/isolated_store_impl.h | 2 +- source/common/stats/thread_local_store.cc | 42 +++++++++++-------- source/server/configuration_impl.cc | 8 ++++ source/server/configuration_impl.h | 2 + source/server/server.cc | 41 +++++++----------- source/server/server.h | 8 ++-- test/common/stats/thread_local_store_test.cc | 22 +++------- test/integration/server.h | 17 ++++++-- test/mocks/server/server_factory_context.h | 1 + test/server/configuration_impl_test.cc | 29 +++++++++++++ test/server/server_test.cc | 32 ++++++++++++++ .../server/stats_evict_bootstrap.yaml | 11 +++++ 17 files changed, 164 insertions(+), 73 deletions(-) create mode 100644 test/server/test_data/server/stats_evict_bootstrap.yaml diff --git a/api/envoy/config/bootstrap/v3/bootstrap.proto b/api/envoy/config/bootstrap/v3/bootstrap.proto index bf65f3df45c47..28b1eba6680ef 100644 --- a/api/envoy/config/bootstrap/v3/bootstrap.proto +++ b/api/envoy/config/bootstrap/v3/bootstrap.proto @@ -41,7 +41,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE; // ` for more detail. // Bootstrap :ref:`configuration overview `. -// [#next-free-field: 42] +// [#next-free-field: 43] message Bootstrap { option (udpa.annotations.versioning).previous_message_type = "envoy.config.bootstrap.v2.Bootstrap"; @@ -230,6 +230,14 @@ message Bootstrap { bool stats_flush_on_admin = 29 [(validate.rules).bool = {const: true}]; } + oneof stats_eviction { + // Optional duration to perform metric eviction. At every interval, during the stats flush + // the unused metrics are removed from the worker caches and the used metrics + // are marked as unused. Must be a multiple of the ``stats_flush_interval``. + google.protobuf.Duration stats_eviction_interval = 42 + [(validate.rules).duration = {gte {nanos: 1000000}}]; + } + // Optional watchdog configuration. // This is for a single watchdog configuration for the entire system. // Deprecated in favor of ``watchdogs`` which has finer granularity. diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 14af06baa005d..57d851639261a 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -42,6 +42,9 @@ removed_config_or_runtime: Removed runtime guard ``envoy.reloadable_features.gcp_authn_use_fixed_url`` and legacy code paths. new_features: +- area: stats + change: | + Added support to remove unused metrics from memory for extensions that support evictable metrics. This is done :ref:`periodically ` during the metric flush. - area: health_check change: | Added support for request payloads in HTTP health checks. The ``send`` field in ``HttpHealthCheck`` can now be diff --git a/envoy/server/configuration.h b/envoy/server/configuration.h index 4c18b49054270..9da04d1ac7012 100644 --- a/envoy/server/configuration.h +++ b/envoy/server/configuration.h @@ -89,6 +89,11 @@ class StatsConfig { * @return true if deferred creation of stats is enabled. */ virtual bool enableDeferredCreationStats() const PURE; + + /** + * @return uint32_t a multiple of the flush interval to perform stats eviction, or 0 if disabled. + */ + virtual uint32_t evictOnFlush() const PURE; }; /** diff --git a/envoy/stats/store.h b/envoy/stats/store.h index 2d816d9b2c2cf..306cef4c5a9b0 100644 --- a/envoy/stats/store.h +++ b/envoy/stats/store.h @@ -118,7 +118,7 @@ class Store { virtual void forEachScope(SizeFn f_size, StatFn f_stat) const PURE; /** - * Evict unused metrics from all the scope caches. + * Delete unused metrics from all the evictable scope caches, and mark the rest as unused. */ virtual void evictUnused() PURE; diff --git a/source/common/stats/allocator_impl.cc b/source/common/stats/allocator_impl.cc index 6cd435abd656a..963279a96ca9e 100644 --- a/source/common/stats/allocator_impl.cc +++ b/source/common/stats/allocator_impl.cc @@ -211,7 +211,7 @@ class GaugeImpl : public StatsSharedImpl { } void sub(uint64_t amount) override { ASSERT(child_value_ >= amount); - // ASSERT(used() || amount == 0); + ASSERT(used() || amount == 0); child_value_ -= amount; } uint64_t value() const override { return child_value_ + parent_value_; } diff --git a/source/common/stats/isolated_store_impl.h b/source/common/stats/isolated_store_impl.h index a8f273492e52d..76c3d364bf77b 100644 --- a/source/common/stats/isolated_store_impl.h +++ b/source/common/stats/isolated_store_impl.h @@ -204,7 +204,7 @@ class IsolatedStoreImpl : public Store { } void evictUnused() override { - // Do nothing. Eviction is only supported on thread local stores. + // Do nothing. Eviction is only supported on the thread local stores. } void forEachSinkedCounter(SizeFn f_size, StatFn f_stat) const override { diff --git a/source/common/stats/thread_local_store.cc b/source/common/stats/thread_local_store.cc index b6388f58c85a5..6bc34f71a086b 100644 --- a/source/common/stats/thread_local_store.cc +++ b/source/common/stats/thread_local_store.cc @@ -914,7 +914,7 @@ bool ParentHistogramImpl::used() const { void ParentHistogramImpl::markUnused() { merged_ = false; - Thread::ReleasableLockGuard lock(merge_lock_); + Thread::LockGuard lock(merge_lock_); for (const TlsHistogramSharedPtr& tls_histogram : tls_histograms_) { tls_histogram->markUnused(); } @@ -1071,10 +1071,11 @@ void ThreadLocalStoreImpl::evictUnused() { if (scope->evictable_) { MetricBag metrics(scope->scope_id_); CentralCacheEntrySharedPtr& central_cache = scope->centralCacheMutableNoThreadAnalysis(); - auto collect_unused = [](StatNameHashMap& unused_metrics) { + auto filter_unused = [](StatNameHashMap& unused_metrics) { return [&unused_metrics](std::pair kv) { const auto& [name, metric] = kv; if (metric->used()) { + metric->markUnused(); return false; } else { unused_metrics.try_emplace(name, metric); @@ -1082,10 +1083,10 @@ void ThreadLocalStoreImpl::evictUnused() { } }; }; - absl::erase_if(central_cache->counters_, collect_unused(metrics.counters_)); - absl::erase_if(central_cache->gauges_, collect_unused(metrics.gauges_)); - absl::erase_if(central_cache->text_readouts_, collect_unused(metrics.text_readouts_)); - absl::erase_if(central_cache->histograms_, collect_unused(metrics.histograms_)); + absl::erase_if(central_cache->counters_, filter_unused(metrics.counters_)); + absl::erase_if(central_cache->gauges_, filter_unused(metrics.gauges_)); + absl::erase_if(central_cache->text_readouts_, filter_unused(metrics.text_readouts_)); + absl::erase_if(central_cache->histograms_, filter_unused(metrics.histograms_)); if (!metrics.empty()) { evicted_metrics->push_back(std::move(metrics)); } @@ -1103,19 +1104,19 @@ void ThreadLocalStoreImpl::evictUnused() { for (const auto& metrics : *evicted_metrics) { TlsCacheEntry& entry = tls_cache->insertScope(metrics.scope_id_); absl::erase_if(entry.counters_, - [=](std::pair> kv) { + [&](std::pair> kv) { return metrics.counters_.contains(kv.first); }); absl::erase_if(entry.gauges_, - [=](std::pair> kv) { + [&](std::pair> kv) { return metrics.gauges_.contains(kv.first); }); absl::erase_if(entry.text_readouts_, - [=](std::pair> kv) { + [&](std::pair> kv) { return metrics.text_readouts_.contains(kv.first); }); absl::erase_if(entry.parent_histograms_, - [=](std::pair kv) { + [&](std::pair kv) { return metrics.histograms_.contains(kv.first); }); } @@ -1123,16 +1124,21 @@ void ThreadLocalStoreImpl::evictUnused() { [evicted_metrics]() { // We want to delete stale stats on the main thread since stat // destructors lock the stats allocator. Note that we might have - // received fresh values on the stale cache-local stats. Eventually, we - // might also want to defer the deletion further in the allocator until - // the values are flushed to the sinks. + // received fresh values on the stale cache-local stats after deleting them from the + // central cache.. Eventually, we might also want to defer the deletion further in the + // allocator until the values are flushed to the sinks. + size_t scopes = 0, counters = 0, gauges = 0, readouts = 0, histograms = 0; for (const auto& metrics : *evicted_metrics) { - ENVOY_LOG(info, - "deleted stale {} counters, {} gauges, {} text readouts, {} histograms from " - "scope {}", - metrics.counters_.size(), metrics.gauges_.size(), - metrics.text_readouts_.size(), metrics.histograms_.size(), metrics.scope_id_); + scopes += 1; + counters += metrics.counters_.size(); + gauges += metrics.gauges_.size(); + readouts += metrics.text_readouts_.size(); + histograms += metrics.histograms_.size(); } + ENVOY_LOG(debug, + "deleted stale {} counters, {} gauges, {} text readouts, {} histograms from " + "{} scopes", + counters, gauges, readouts, histograms, scopes); }); } } diff --git a/source/server/configuration_impl.cc b/source/server/configuration_impl.cc index a1c9a438604cd..f341803f78c4e 100644 --- a/source/server/configuration_impl.cc +++ b/source/server/configuration_impl.cc @@ -99,6 +99,14 @@ StatsConfigImpl::StatsConfigImpl(const envoy::config::bootstrap::v3::Bootstrap& if (bootstrap.stats_flush_case() == envoy::config::bootstrap::v3::Bootstrap::kStatsFlushOnAdmin) { flush_on_admin_ = bootstrap.stats_flush_on_admin(); } + + const auto evict_interval_ms = PROTOBUF_GET_MS_OR_DEFAULT(bootstrap, stats_eviction_interval, 0); + if (evict_interval_ms % flush_interval_.count() != 0) { + status = absl::InvalidArgumentError( + "stats_eviction_interval must be a multiple of stats_flush_interval"); + return; + } + evict_on_flush_ = evict_interval_ms / flush_interval_.count(); } absl::Status MainImpl::initialize(const envoy::config::bootstrap::v3::Bootstrap& bootstrap, diff --git a/source/server/configuration_impl.h b/source/server/configuration_impl.h index 2165239456f1b..932ffab806b24 100644 --- a/source/server/configuration_impl.h +++ b/source/server/configuration_impl.h @@ -56,6 +56,7 @@ class StatsConfigImpl : public StatsConfig { const std::list& sinks() const override { return sinks_; } std::chrono::milliseconds flushInterval() const override { return flush_interval_; } bool flushOnAdmin() const override { return flush_on_admin_; } + uint32_t evictOnFlush() const override { return evict_on_flush_; } void addSink(Stats::SinkPtr sink) { sinks_.emplace_back(std::move(sink)); } bool enableDeferredCreationStats() const override { @@ -67,6 +68,7 @@ class StatsConfigImpl : public StatsConfig { std::chrono::milliseconds flush_interval_; bool flush_on_admin_{false}; const envoy::config::bootstrap::v3::Bootstrap::DeferredStatOptions deferred_stat_options_; + uint32_t evict_on_flush_{0}; }; /** diff --git a/source/server/server.cc b/source/server/server.cc index 07ad1f1ba1d03..cd89ef64c6a9c 100644 --- a/source/server/server.cc +++ b/source/server/server.cc @@ -170,18 +170,15 @@ void InstanceBase::failHealthcheck(bool fail) { MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, Upstream::ClusterManager& cluster_manager, - TimeSource& time_source, bool mark_unused) { + TimeSource& time_source) { store.forEachSinkedCounter( [this](std::size_t size) { snapped_counters_.reserve(size); counters_.reserve(size); }, - [this, mark_unused](Stats::Counter& counter) { + [this](Stats::Counter& counter) { snapped_counters_.push_back(Stats::CounterSharedPtr(&counter)); counters_.push_back({counter.latch(), counter}); - if (mark_unused) { - counter.markUnused(); - } }); store.forEachSinkedGauge( @@ -189,12 +186,9 @@ MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, snapped_gauges_.reserve(size); gauges_.reserve(size); }, - [this, mark_unused](Stats::Gauge& gauge) { + [this](Stats::Gauge& gauge) { snapped_gauges_.push_back(Stats::GaugeSharedPtr(&gauge)); gauges_.push_back(gauge); - if (mark_unused) { - gauge.markUnused(); - } }); store.forEachSinkedHistogram( @@ -202,12 +196,9 @@ MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, snapped_histograms_.reserve(size); histograms_.reserve(size); }, - [this, mark_unused](Stats::ParentHistogram& histogram) { + [this](Stats::ParentHistogram& histogram) { snapped_histograms_.push_back(Stats::ParentHistogramSharedPtr(&histogram)); histograms_.push_back(histogram); - if (mark_unused) { - histogram.markUnused(); - } }); store.forEachSinkedTextReadout( @@ -215,12 +206,9 @@ MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, snapped_text_readouts_.reserve(size); text_readouts_.reserve(size); }, - [this, mark_unused](Stats::TextReadout& text_readout) { + [this](Stats::TextReadout& text_readout) { snapped_text_readouts_.push_back(Stats::TextReadoutSharedPtr(&text_readout)); text_readouts_.push_back(text_readout); - if (mark_unused) { - text_readout.markUnused(); - } }); Upstream::HostUtility::forEachHostMetric( @@ -236,19 +224,12 @@ MetricSnapshotImpl::MetricSnapshotImpl(Stats::Store& store, } void InstanceUtil::flushMetricsToSinks(const std::list& sinks, Stats::Store& store, - Upstream::ClusterManager& cm, TimeSource& time_source, - bool evict) { - // Eviction will schedule deletion of the stale metrics from the central caches on the main loop, - // so they are still present for the duration of this function. - if (evict) { - store.evictUnused(); - } - + Upstream::ClusterManager& cm, TimeSource& time_source) { // Create a snapshot and flush to all sinks. // NOTE: Even if there are no sinks, creating the snapshot has the important property that it // latches all counters on a periodic basis. The hot restart code assumes this is being // done so this should not be removed. - MetricSnapshotImpl snapshot(store, cm, time_source, evict); + MetricSnapshotImpl snapshot(store, cm, time_source); for (const auto& sink : sinks) { sink->flush(snapshot); } @@ -310,7 +291,13 @@ void InstanceBase::flushStatsInternal() { updateServerStats(); auto& stats_config = config_.statsConfig(); InstanceUtil::flushMetricsToSinks(stats_config.sinks(), stats_store_, clusterManager(), - timeSource(), true); + timeSource()); + if (const auto evict_on_flush = stats_config.evictOnFlush(); evict_on_flush > 0) { + stats_eviction_counter_ = (stats_eviction_counter_ + 1) % evict_on_flush; + if (stats_eviction_counter_ == 0) { + stats_store_.evictUnused(); + } + } // TODO(ramaraochavali): consider adding different flush interval for histograms. if (stat_flush_timer_ != nullptr) { stat_flush_timer_->enableTimer(stats_config.flushInterval()); diff --git a/source/server/server.h b/source/server/server.h index 151150bdc9f2d..b59fce228f034 100644 --- a/source/server/server.h +++ b/source/server/server.h @@ -132,11 +132,9 @@ class InstanceUtil : Logger::Loggable { * flush() on each sink. * @param sinks supplies the list of sinks. * @param store provides the store being flushed. - * @param evict whether to delete the stale metrics and clear the usage flags. */ static void flushMetricsToSinks(const std::list& sinks, Stats::Store& store, - Upstream::ClusterManager& cm, TimeSource& time_source, - bool evict); + Upstream::ClusterManager& cm, TimeSource& time_source); /** * Load a bootstrap config and perform validation. @@ -455,6 +453,8 @@ class InstanceBase : Logger::Loggable, : RaiiListElement(callbacks, callback) {} }; + uint32_t stats_eviction_counter_{0}; + #ifdef ENVOY_PERFETTO std::unique_ptr tracing_session_{}; os_fd_t tracing_fd_{INVALID_HANDLE}; @@ -473,7 +473,7 @@ class MetricSnapshotImpl : public Stats::MetricSnapshot { // MetricSnapshotImpl captures a snapshot of metrics by latching the delta usage, and optionally // marking the stats as used. explicit MetricSnapshotImpl(Stats::Store& store, Upstream::ClusterManager& cluster_manager, - TimeSource& time_source, bool mark_unused); + TimeSource& time_source); // Stats::MetricSnapshot const std::vector& counters() override { return counters_; } diff --git a/test/common/stats/thread_local_store_test.cc b/test/common/stats/thread_local_store_test.cc index abaef9f8b6e4e..b689fce5aa0e8 100644 --- a/test/common/stats/thread_local_store_test.cc +++ b/test/common/stats/thread_local_store_test.cc @@ -632,40 +632,30 @@ TEST_F(StatsThreadLocalStoreTest, Eviction) { h1.recordValue(1); store_->mergeHistograms([]() -> void {}); - // Eviction does nothing. + // Eviction only marks unused but does not remove the counters. store_->evictUnused(); EXPECT_EQ(&c1, &scope->counterFromString("c1")); - EXPECT_TRUE(c1.used()); + EXPECT_FALSE(c1.used()); EXPECT_EQ(1, c1.value()); EXPECT_EQ(1UL, store_->counters().size()); EXPECT_EQ(&g1, &scope->gaugeFromString("g1", Gauge::ImportMode::Accumulate)); EXPECT_EQ(&g1, &scope1->gaugeFromString("g1", Gauge::ImportMode::Accumulate)); - EXPECT_TRUE(g1.used()); + EXPECT_FALSE(g1.used()); EXPECT_EQ(5, g1.value()); EXPECT_EQ(1UL, store_->gauges().size()); EXPECT_EQ(&t1, &scope->textReadoutFromString("t1")); EXPECT_EQ(&t1, &scope1->textReadoutFromString("t1")); - EXPECT_TRUE(t1.used()); + EXPECT_FALSE(t1.used()); EXPECT_EQ("hello", t1.value()); EXPECT_EQ(1UL, store_->textReadouts().size()); EXPECT_EQ(&h1, &scope->histogramFromString("h1", Histogram::Unit::Unspecified)); EXPECT_EQ(&h1, &scope1->histogramFromString("h1", Histogram::Unit::Unspecified)); - EXPECT_TRUE(h1.used()); - EXPECT_EQ(1UL, store_->histograms().size()); - - // Mark unused all metrics. - c1.markUnused(); - g1.markUnused(); - t1.markUnused(); - h1.markUnused(); - EXPECT_FALSE(c1.used()); - EXPECT_FALSE(g1.used()); - EXPECT_FALSE(t1.used()); EXPECT_FALSE(h1.used()); + EXPECT_EQ(1UL, store_->histograms().size()); } // Eviction removes here. @@ -676,7 +666,7 @@ TEST_F(StatsThreadLocalStoreTest, Eviction) { EXPECT_EQ(0UL, store_->textReadouts().size()); EXPECT_EQ(0UL, store_->histograms().size()); - // Make sure no stale data is on caches. + // Make sure no dangling data is on caches and it is safe to use the same metrics. { scope->counterFromString("c1").add(1); scope1->counterFromString("c1").add(1); diff --git a/test/integration/server.h b/test/integration/server.h index 34637480ae36b..a78a8b4eb4ef2 100644 --- a/test/integration/server.h +++ b/test/integration/server.h @@ -77,12 +77,12 @@ class TestScopeWrapper : public Scope { TestScopeWrapper(Thread::MutexBasicLockable& lock, ScopeSharedPtr wrapped_scope, Store& store) : lock_(lock), wrapped_scope_(wrapped_scope), store_(store) {} - ScopeSharedPtr createScope(const std::string& name) override { + ScopeSharedPtr createScope(const std::string& name, bool) override { Thread::LockGuard lock(lock_); return std::make_shared(lock_, wrapped_scope_->createScope(name), store_); } - ScopeSharedPtr scopeFromStatName(StatName name) override { + ScopeSharedPtr scopeFromStatName(StatName name, bool) override { Thread::LockGuard lock(lock_); return std::make_shared(lock_, wrapped_scope_->scopeFromStatName(name), store_); @@ -163,8 +163,6 @@ class TestScopeWrapper : public Scope { Store& store() override { return store_; } const Store& constStore() const override { return store_; } - void evictAndMarkUnused() override {} - private: Thread::MutexBasicLockable& lock_; ScopeSharedPtr wrapped_scope_; @@ -368,6 +366,16 @@ class TestIsolatedStoreImpl : public StoreRoot { void extractAndAppendTags(absl::string_view, StatNamePool&, StatNameTagVector&) override {}; const Stats::TagVector& fixedTags() override { CONSTRUCT_ON_FIRST_USE(Stats::TagVector); } + void evictUnused() override { + Thread::LockGuard lock(lock_); + eviction_count_++; + } + + uint32_t evictionCount() const { + Thread::LockGuard lock(lock_); + return eviction_count_; + } + // Stats::StoreRoot void addSink(Sink&) override {} void setTagProducer(TagProducerPtr&&) override {} @@ -384,6 +392,7 @@ class TestIsolatedStoreImpl : public StoreRoot { IsolatedStoreImpl store_; PostMergeCb merge_cb_; ScopeSharedPtr lazy_default_scope_; + uint32_t eviction_count_{0}; }; } // namespace Stats diff --git a/test/mocks/server/server_factory_context.h b/test/mocks/server/server_factory_context.h index 7221103be6359..059a50aaa9032 100644 --- a/test/mocks/server/server_factory_context.h +++ b/test/mocks/server/server_factory_context.h @@ -51,6 +51,7 @@ class MockStatsConfig : public virtual StatsConfig { MOCK_METHOD(bool, flushOnAdmin, (), (const)); MOCK_METHOD(const Stats::SinkPredicates*, sinkPredicates, (), (const)); MOCK_METHOD(bool, enableDeferredCreationStats, (), (const)); + MOCK_METHOD(uint32_t, evictOnFlush, (), (const)); }; class MockServerFactoryContext : public virtual ServerFactoryContext { diff --git a/test/server/configuration_impl_test.cc b/test/server/configuration_impl_test.cc index 5ce86dc719cea..c916ebd031643 100644 --- a/test/server/configuration_impl_test.cc +++ b/test/server/configuration_impl_test.cc @@ -94,6 +94,7 @@ TEST_F(ConfigurationImplTest, DefaultStatsFlushInterval) { EXPECT_EQ(std::chrono::milliseconds(5000), config.statsConfig().flushInterval()); EXPECT_FALSE(config.statsConfig().flushOnAdmin()); + EXPECT_EQ(0, config.statsConfig().evictOnFlush()); } TEST_F(ConfigurationImplTest, CustomStatsFlushInterval) { @@ -224,6 +225,34 @@ TEST_F(ConfigurationImplTest, IntervalAndAdminFlush) { "Only one of stats_flush_interval or stats_flush_on_admin should be set!"); } +TEST_F(ConfigurationImplTest, Eviction) { + std::string json = R"EOF( + { + "stats_flush_interval": "0.500s", + "stats_eviction_interval": "1.5s" + } + )EOF"; + + auto bootstrap = Upstream::parseBootstrapFromV3Json(json); + MainImpl config; + EXPECT_TRUE(config.initialize(bootstrap, server_, cluster_manager_factory_).ok()); + EXPECT_EQ(3, config.statsConfig().evictOnFlush()); +} + +TEST_F(ConfigurationImplTest, EvictionNotMultiple) { + std::string json = R"EOF( + { + "stats_flush_interval": "0.500s", + "stats_eviction_interval": "0.750s" + } + )EOF"; + + auto bootstrap = Upstream::parseBootstrapFromV3Json(json); + MainImpl config; + EXPECT_THAT(config.initialize(bootstrap, server_, cluster_manager_factory_).message(), + testing::HasSubstr("must be a multiple")); +} + TEST_F(ConfigurationImplTest, SetUpstreamClusterPerConnectionBufferLimit) { const std::string json = R"EOF( { diff --git a/test/server/server_test.cc b/test/server/server_test.cc index 9b69532160bc4..d90b608ccdd8d 100644 --- a/test/server/server_test.cc +++ b/test/server/server_test.cc @@ -964,6 +964,38 @@ TEST_P(ServerInstanceImplTest, FlushStatsOnAdmin) { server_thread->join(); } +TEST_P(ServerInstanceImplTest, EvictStats) { + CustomStatsSinkFactory factory; + Registry::InjectFactory registered(factory); + auto server_thread = + startTestServer("test/server/test_data/server/stats_evict_bootstrap.yaml", true); + EXPECT_EQ(2, server_->statsConfig().evictOnFlush()); + EXPECT_EQ(std::chrono::seconds(5), server_->statsConfig().flushInterval()); + + auto counter = TestUtility::findCounter(stats_store_, "stats.flushed"); + + time_system_.advanceTimeWait(std::chrono::seconds(6)); + EXPECT_EQ(1L, counter->value()); + EXPECT_EQ(0, stats_store_.evictionCount()); + + // Eviction applied here: side-effect is that c1 is now marked as unused. + time_system_.advanceTimeWait(std::chrono::seconds(6)); + EXPECT_EQ(2L, counter->value()); + EXPECT_EQ(1, stats_store_.evictionCount()); + + time_system_.advanceTimeWait(std::chrono::seconds(6)); + EXPECT_EQ(3L, counter->value()); + EXPECT_EQ(1, stats_store_.evictionCount()); + + // Second pass of eviction deletes the counter. + time_system_.advanceTimeWait(std::chrono::seconds(6)); + EXPECT_EQ(4L, counter->value()); + EXPECT_EQ(2, stats_store_.evictionCount()); + + server_->dispatcher().post([&] { server_->shutdown(); }); + server_thread->join(); +} + TEST_P(ServerInstanceImplTest, ConcurrentFlushes) { CustomStatsSinkFactory factory; Registry::InjectFactory registered(factory); diff --git a/test/server/test_data/server/stats_evict_bootstrap.yaml b/test/server/test_data/server/stats_evict_bootstrap.yaml new file mode 100644 index 0000000000000..b295c2c0c471c --- /dev/null +++ b/test/server/test_data/server/stats_evict_bootstrap.yaml @@ -0,0 +1,11 @@ +node: + id: bootstrap_id + cluster: bootstrap_cluster + locality: + zone: bootstrap_zone + sub_zone: bootstrap_sub_zone +stats_sinks: +- name: envoy.custom_stats_sink + typed_config: + "@type": type.googleapis.com/google.protobuf.Struct +stats_eviction_interval: 10s From 6726d8365a15c3e9af37d7606db6b3aab9085af2 Mon Sep 17 00:00:00 2001 From: Kuat Yessenov Date: Tue, 5 Aug 2025 22:46:04 +0000 Subject: [PATCH 6/6] format Change-Id: Ie035c25d3900eab69b4317649e28f12c32189daa Signed-off-by: Kuat Yessenov --- changelogs/current.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index b6b6521cd2b6d..04fe673be0e92 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -81,7 +81,10 @@ removed_config_or_runtime: new_features: - area: stats change: | - Added support to remove unused metrics from memory for extensions that support evictable metrics. This is done :ref:`periodically ` during the metric flush. + Added support to remove unused metrics from memory for extensions that + support evictable metrics. This is done :ref:`periodically + ` + during the metric flush. - area: quic change: | Added new option to support :ref:`base64 encoded server ID