Skip to content
10 changes: 9 additions & 1 deletion api/envoy/config/bootstrap/v3/bootstrap.proto
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE;
// <config_overview_bootstrap>` for more detail.

// Bootstrap :ref:`configuration overview <config_overview_bootstrap>`.
// [#next-free-field: 42]
// [#next-free-field: 43]
message Bootstrap {
option (udpa.annotations.versioning).previous_message_type =
"envoy.config.bootstrap.v2.Bootstrap";
Expand Down Expand Up @@ -230,6 +230,14 @@ message Bootstrap {
bool stats_flush_on_admin = 29 [(validate.rules).bool = {const: true}];
}

oneof stats_eviction {
// Optional duration to perform metric eviction. At every interval, during the stats flush
// the unused metrics are removed from the worker caches and the used metrics
// are marked as unused. Must be a multiple of the ``stats_flush_interval``.
google.protobuf.Duration stats_eviction_interval = 42
[(validate.rules).duration = {gte {nanos: 1000000}}];
}

// Optional watchdog configuration.
// This is for a single watchdog configuration for the entire system.
// Deprecated in favor of ``watchdogs`` which has finer granularity.
Expand Down
6 changes: 6 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ removed_config_or_runtime:
Removed runtime guard ``envoy.reloadable_features.proxy_104`` and legacy code paths.

new_features:
- area: stats
change: |
Added support to remove unused metrics from memory for extensions that
support evictable metrics. This is done :ref:`periodically
<envoy_v3_api_field_config.bootstrap.v3.Bootstrap.stats_eviction_interval>`
during the metric flush.
- area: quic
change: |
Added new option to support :ref:`base64 encoded server ID
Expand Down
5 changes: 5 additions & 0 deletions envoy/server/configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ class StatsConfig {
* @return true if deferred creation of stats is enabled.
*/
virtual bool enableDeferredCreationStats() const PURE;

/**
* @return uint32_t a multiple of the flush interval to perform stats eviction, or 0 if disabled.
*/
virtual uint32_t evictOnFlush() const PURE;
};

/**
Expand Down
8 changes: 6 additions & 2 deletions envoy/stats/scope.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,21 @@ class Scope : public std::enable_shared_from_this<Scope> {
* See also scopeFromStatName, which is preferred.
*
* @param name supplies the scope's namespace prefix.
* @param evictable whether unused metrics can be deleted from the scope caches. This requires
* that the metrics are not stored by reference.
*/
virtual ScopeSharedPtr createScope(const std::string& name) PURE;
virtual ScopeSharedPtr createScope(const std::string& name, bool evictable = false) PURE;

/**
* Allocate a new scope. NOTE: The implementation should correctly handle overlapping scopes
* that point to the same reference counted backing stats. This allows a new scope to be
* gracefully swapped in while an old scope with the same name is being destroyed.
*
* @param name supplies the scope's namespace prefix.
* @param evictable whether unused metrics can be deleted from the scope caches. This requires
* that the metrics are not stored by reference.
*/
virtual ScopeSharedPtr scopeFromStatName(StatName name) PURE;
virtual ScopeSharedPtr scopeFromStatName(StatName name, bool evictable = false) PURE;

/**
* Creates a Counter from the stat name. Tag extraction will be performed on the name.
Expand Down
5 changes: 5 additions & 0 deletions envoy/stats/stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ class Metric : public RefcountInterface {
*/
virtual bool used() const PURE;

/**
* Clear any indicator on whether this metric has been updated.
*/
virtual void markUnused() PURE;

/**
* Indicates whether this metric is hidden.
*/
Expand Down
9 changes: 8 additions & 1 deletion envoy/stats/store.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,11 @@ class Store {
virtual void forEachHistogram(SizeFn f_size, StatFn<ParentHistogram> f_stat) const PURE;
virtual void forEachScope(SizeFn f_size, StatFn<const Scope> f_stat) const PURE;

/**
* Delete unused metrics from all the evictable scope caches, and mark the rest as unused.
*/
virtual void evictUnused() PURE;

/**
* @return a null counter that will ignore increments and always return 0.
*/
Expand Down Expand Up @@ -172,7 +177,9 @@ class Store {
/**
* @return a scope of the given name.
*/
ScopeSharedPtr createScope(const std::string& name) { return rootScope()->createScope(name); }
ScopeSharedPtr createScope(const std::string& name, bool evictable = false) {
return rootScope()->createScope(name, evictable);
}

/**
* Extracts tags from the name and appends them to the provided StatNameTagVector.
Expand Down
1 change: 1 addition & 0 deletions source/common/stats/allocator_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ template <class BaseClass> class StatsSharedImpl : public MetricImpl<BaseClass>
// Metric
SymbolTable& symbolTable() final { return alloc_.symbolTable(); }
bool used() const override { return flags_ & Metric::Flags::Used; }
void markUnused() override { flags_ &= ~Metric::Flags::Used; }
bool hidden() const override { return flags_ & Metric::Flags::Hidden; }

// RefcountInterface
Expand Down
2 changes: 2 additions & 0 deletions source/common/stats/histogram_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ class HistogramImpl : public HistogramImplHelper {
void recordValue(uint64_t value) override { parent_.deliverHistogramToSinks(*this, value); }

bool used() const override { return true; }
void markUnused() override {}
bool hidden() const override { return false; }
SymbolTable& symbolTable() final { return parent_.symbolTable(); }

Expand All @@ -132,6 +133,7 @@ class NullHistogramImpl : public HistogramImplHelper {
~NullHistogramImpl() override { MetricImpl::clear(symbol_table_); }

bool used() const override { return false; }
void markUnused() override {}
bool hidden() const override { return false; }
SymbolTable& symbolTable() override { return symbol_table_; }

Expand Down
6 changes: 3 additions & 3 deletions source/common/stats/isolated_store_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ ConstScopeSharedPtr IsolatedStoreImpl::constRootScope() const {

IsolatedStoreImpl::~IsolatedStoreImpl() = default;

ScopeSharedPtr IsolatedScopeImpl::createScope(const std::string& name) {
ScopeSharedPtr IsolatedScopeImpl::createScope(const std::string& name, bool) {
StatNameManagedStorage stat_name_storage(Utility::sanitizeStatsName(name), symbolTable());
return scopeFromStatName(stat_name_storage.statName());
return scopeFromStatName(stat_name_storage.statName(), false);
}

ScopeSharedPtr IsolatedScopeImpl::scopeFromStatName(StatName name) {
ScopeSharedPtr IsolatedScopeImpl::scopeFromStatName(StatName name, bool) {
SymbolTable::StoragePtr prefix_name_storage = symbolTable().join({prefix(), name});
ScopeSharedPtr scope = store_.makeScope(StatName(prefix_name_storage.get()));
addScopeToStore(scope);
Expand Down
8 changes: 6 additions & 2 deletions source/common/stats/isolated_store_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,10 @@ class IsolatedStoreImpl : public Store {
}
}

void evictUnused() override {
// Do nothing. Eviction is only supported on the thread local stores.
}

void forEachSinkedCounter(SizeFn f_size, StatFn<Counter> f_stat) const override {
forEachCounter(f_size, f_stat);
}
Expand Down Expand Up @@ -295,8 +299,8 @@ class IsolatedScopeImpl : public Scope {
StatNameTagVectorOptConstRef tags) override {
return store_.counters_.get(prefix(), name, tags, symbolTable());
}
ScopeSharedPtr createScope(const std::string& name) override;
ScopeSharedPtr scopeFromStatName(StatName name) override;
ScopeSharedPtr createScope(const std::string& name, bool evictable) override;
ScopeSharedPtr scopeFromStatName(StatName name, bool evictable) override;
Gauge& gaugeFromStatNameWithTags(const StatName& name, StatNameTagVectorOptConstRef tags,
Gauge::ImportMode import_mode) override {
Gauge& gauge = store_.gauges_.get(prefix(), name, tags, symbolTable(), import_mode);
Expand Down
1 change: 1 addition & 0 deletions source/common/stats/null_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class NullCounterImpl : public MetricImpl<Counter> {

// Metric
bool used() const override { return false; }
void markUnused() override {}
bool hidden() const override { return false; }
SymbolTable& symbolTable() override { return symbol_table_; }

Expand Down
1 change: 1 addition & 0 deletions source/common/stats/null_gauge.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class NullGaugeImpl : public MetricImpl<Gauge> {

// Metric
bool used() const override { return false; }
void markUnused() override {}
bool hidden() const override { return false; }
SymbolTable& symbolTable() override { return symbol_table_; }

Expand Down
1 change: 1 addition & 0 deletions source/common/stats/null_text_readout.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class NullTextReadoutImpl : public MetricImpl<TextReadout> {

// Metric
bool used() const override { return false; }
void markUnused() override {}
bool hidden() const override { return false; }
SymbolTable& symbolTable() override { return symbol_table_; }

Expand Down
127 changes: 120 additions & 7 deletions source/common/stats/thread_local_store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ ThreadLocalStoreImpl::ThreadLocalStoreImpl(Allocator& alloc)
well_known_tags_->rememberBuiltin(desc.name_);
}
StatNameManagedStorage empty("", alloc.symbolTable());
auto new_scope = std::make_shared<ScopeImpl>(*this, StatName(empty.statName()));
auto new_scope = std::make_shared<ScopeImpl>(*this, StatName(empty.statName()), false);
addScope(new_scope);
default_scope_ = new_scope;
}
Expand Down Expand Up @@ -154,14 +154,15 @@ std::vector<CounterSharedPtr> ThreadLocalStoreImpl::counters() const {
return ret;
}

ScopeSharedPtr ThreadLocalStoreImpl::ScopeImpl::createScope(const std::string& name) {
ScopeSharedPtr ThreadLocalStoreImpl::ScopeImpl::createScope(const std::string& name,
bool evictable) {
StatNameManagedStorage stat_name_storage(Utility::sanitizeStatsName(name), symbolTable());
return scopeFromStatName(stat_name_storage.statName());
return scopeFromStatName(stat_name_storage.statName(), evictable);
}

ScopeSharedPtr ThreadLocalStoreImpl::ScopeImpl::scopeFromStatName(StatName name) {
ScopeSharedPtr ThreadLocalStoreImpl::ScopeImpl::scopeFromStatName(StatName name, bool evictable) {
SymbolTable::StoragePtr joined = symbolTable().join({prefix_.statName(), name});
auto new_scope = std::make_shared<ScopeImpl>(parent_, StatName(joined.get()));
auto new_scope = std::make_shared<ScopeImpl>(parent_, StatName(joined.get()), evictable);
parent_.addScope(new_scope);
return new_scope;
}
Expand Down Expand Up @@ -394,8 +395,9 @@ void ThreadLocalStoreImpl::clearHistogramsFromCaches() {
}
}

ThreadLocalStoreImpl::ScopeImpl::ScopeImpl(ThreadLocalStoreImpl& parent, StatName prefix)
: scope_id_(parent.next_scope_id_++), parent_(parent),
ThreadLocalStoreImpl::ScopeImpl::ScopeImpl(ThreadLocalStoreImpl& parent, StatName prefix,
bool evictable)
: scope_id_(parent.next_scope_id_++), parent_(parent), evictable_(evictable),
prefix_(prefix, parent.alloc_.symbolTable()),
central_cache_(new CentralCacheEntry(parent.alloc_.symbolTable())) {}

Expand Down Expand Up @@ -910,6 +912,14 @@ bool ParentHistogramImpl::used() const {
return merged_;
}

void ParentHistogramImpl::markUnused() {
merged_ = false;
Thread::LockGuard lock(merge_lock_);
for (const TlsHistogramSharedPtr& tls_histogram : tls_histograms_) {
tls_histogram->markUnused();
}
}

bool ParentHistogramImpl::hidden() const { return false; }

void ParentHistogramImpl::merge() {
Expand Down Expand Up @@ -1030,6 +1040,109 @@ void ThreadLocalStoreImpl::forEachScope(std::function<void(std::size_t)> f_size,
}
}

namespace {
struct MetricBag {
explicit MetricBag(uint64_t scope_id) : scope_id_(scope_id) {}
const uint64_t scope_id_;
StatNameHashMap<CounterSharedPtr> counters_;
StatNameHashMap<GaugeSharedPtr> gauges_;
StatNameHashMap<ParentHistogramImplSharedPtr> histograms_;
StatNameHashMap<TextReadoutSharedPtr> text_readouts_;
bool empty() const {
return counters_.empty() && gauges_.empty() && histograms_.empty() && text_readouts_.empty();
}
};

} // namespace

void ThreadLocalStoreImpl::evictUnused() {
ASSERT_IS_MAIN_OR_TEST_THREAD();

// If we are shutting down, we no longer perform eviction as workers may be shutting down
// and not able to complete their work.
if (shutting_down_ || !tls_cache_) {
return;
}

auto evicted_metrics = std::make_shared<std::vector<MetricBag>>();
{
Thread::LockGuard lock(lock_);
iterateScopesLockHeld([evicted_metrics](const ScopeImplSharedPtr& scope) -> bool {
if (scope->evictable_) {
MetricBag metrics(scope->scope_id_);
CentralCacheEntrySharedPtr& central_cache = scope->centralCacheMutableNoThreadAnalysis();
auto filter_unused = []<typename T>(StatNameHashMap<T>& unused_metrics) {
return [&unused_metrics](std::pair<StatName, T> kv) {
const auto& [name, metric] = kv;
if (metric->used()) {
metric->markUnused();
return false;
} else {
unused_metrics.try_emplace(name, metric);
return true;
}
};
};
absl::erase_if(central_cache->counters_, filter_unused(metrics.counters_));
absl::erase_if(central_cache->gauges_, filter_unused(metrics.gauges_));
absl::erase_if(central_cache->text_readouts_, filter_unused(metrics.text_readouts_));
absl::erase_if(central_cache->histograms_, filter_unused(metrics.histograms_));
if (!metrics.empty()) {
evicted_metrics->push_back(std::move(metrics));
}
}
return true;
});
}

// At this point, central caches no longer return the evicted stats, but we
// need to keep the storage for the evicted stats until after the thread
// local caches are cleared.
if (!evicted_metrics->empty()) {
tls_cache_->runOnAllThreads(
[evicted_metrics](OptRef<TlsCache> tls_cache) {
for (const auto& metrics : *evicted_metrics) {
TlsCacheEntry& entry = tls_cache->insertScope(metrics.scope_id_);
absl::erase_if(entry.counters_,
[&](std::pair<StatName, std::reference_wrapper<Counter>> kv) {
return metrics.counters_.contains(kv.first);
});
absl::erase_if(entry.gauges_,
[&](std::pair<StatName, std::reference_wrapper<Gauge>> kv) {
return metrics.gauges_.contains(kv.first);
});
absl::erase_if(entry.text_readouts_,
[&](std::pair<StatName, std::reference_wrapper<TextReadout>> kv) {
return metrics.text_readouts_.contains(kv.first);
});
absl::erase_if(entry.parent_histograms_,
[&](std::pair<StatName, ParentHistogramSharedPtr> kv) {
return metrics.histograms_.contains(kv.first);
});
}
},
[evicted_metrics]() {
// We want to delete stale stats on the main thread since stat
// destructors lock the stats allocator. Note that we might have
// received fresh values on the stale cache-local stats after deleting them from the
// central cache.. Eventually, we might also want to defer the deletion further in the
// allocator until the values are flushed to the sinks.
size_t scopes = 0, counters = 0, gauges = 0, readouts = 0, histograms = 0;
for (const auto& metrics : *evicted_metrics) {
scopes += 1;
counters += metrics.counters_.size();
gauges += metrics.gauges_.size();
readouts += metrics.text_readouts_.size();
histograms += metrics.histograms_.size();
}
ENVOY_LOG(debug,
"deleted stale {} counters, {} gauges, {} text readouts, {} histograms from "
"{} scopes",
counters, gauges, readouts, histograms, scopes);
});
}
}

bool ThreadLocalStoreImpl::iterateScopesLockHeld(
const std::function<bool(const ScopeImplSharedPtr&)> fn) const
ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
Expand Down
Loading
Loading