Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ go 1.24
require (
github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443
github.com/envoyproxy/go-control-plane v0.13.5-0.20250816044120-872f08a0dcd7
github.com/envoyproxy/go-control-plane/envoy v1.32.4
github.com/envoyproxy/go-control-plane/envoy v1.32.5-0.20250902220953-4a410393a630
github.com/golang/protobuf v1.5.4
github.com/google/go-cmp v0.7.0
github.com/prometheus/client_model v0.6.1
github.com/prometheus/client_model v0.6.2
github.com/prometheus/common v0.46.0
go.opentelemetry.io/proto/otlp v1.1.0
go.opentelemetry.io/proto/otlp v1.7.1
go.starlark.net v0.0.0-20240123142251-f86470692795
google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0
google.golang.org/grpc v1.74.2
Expand All @@ -23,7 +23,7 @@ require (
cel.dev/expr v0.24.0 // indirect
github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 // indirect
github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
golang.org/x/net v0.42.0 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ github.com/envoyproxy/go-control-plane v0.13.5-0.20250816044120-872f08a0dcd7 h1:
github.com/envoyproxy/go-control-plane v0.13.5-0.20250816044120-872f08a0dcd7/go.mod h1:QfIsBfUKS+TDsIM8Ynd92VrjDtBgWW1CpjLXSBTlyEI=
github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A=
github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw=
github.com/envoyproxy/go-control-plane/envoy v1.32.5-0.20250902220953-4a410393a630 h1:UlhERwhc4Iea3rHstrWA1ApDxU/WQn5Wc2cKxgvo08Y=
github.com/envoyproxy/go-control-plane/envoy v1.32.5-0.20250902220953-4a410393a630/go.mod h1:2LcmvJoXsDSrsGZIxGM0Gah9ykiwTn/kgjyQdnNH8Jc=
github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI=
github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4=
github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8=
Expand All @@ -26,6 +28,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 h1:Wqo399gCIufwto+VfwCSvsnfGpF/w5E9CNxSwbpD6No=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0/go.mod h1:qmOFXW2epJhM0qSnUUYpldc7gVz2KMQwJ/QYCDIa7XU=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 h1:X5VWvz21y3gzm9Nw/kaUeku/1+uBhcekkmy4IkffJww=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1/go.mod h1:Zanoh4+gvIgluNqcfMVTJueD4wSS5hT7zTt4Mrutd90=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
Expand All @@ -36,6 +40,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y=
github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
Expand All @@ -56,6 +62,8 @@ go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKr
go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA=
go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI=
go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY=
go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
go.starlark.net v0.0.0-20240123142251-f86470692795 h1:LmbG8Pq7KDGkglKVn8VpZOZj6vb9b8nKEGcg9l03epM=
go.starlark.net v0.0.0-20240123142251-f86470692795/go.mod h1:LcLNIzVOMp4oV+uusnpk+VU+SzXaJakUuBjoCSWH5dM=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
Expand Down
2 changes: 2 additions & 0 deletions source/extensions/filters/http/istio_stats/config.proto
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,11 @@ message PluginConfig {

// Metric scope rotation interval. Set to 0 to disable the metric scope rotation.
// Defaults to 0.
// DEPRECATED.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just for note, we need to remove all related stuffs(e.g. the env flags) from istio since it won't work anymore.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's a follow up. There's a difference in configuration since this is now a per-Envoy config, so maybe a pod annotation rather than telemetry API.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or just mentioned in custom bootstrap.

google.protobuf.Duration rotation_interval = 11;

// Metric expiry graceful deletion interval. No-op if the metric rotation is disabled.
// Defaults to 5m. Must be >=1s.
// DEPRECATED.
google.protobuf.Duration graceful_deletion_interval = 12;
}
106 changes: 19 additions & 87 deletions source/extensions/filters/http/istio_stats/istio_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ peerInfo(Reporter reporter, const StreamInfo::FilterState& filter_state) {

// Process-wide context shared with all filter instances.
struct Context : public Singleton::Instance {
explicit Context(Stats::SymbolTable& symbol_table, const LocalInfo::LocalInfo& local_info)
: pool_(symbol_table), local_info_(local_info),
explicit Context(Stats::Scope& scope, const LocalInfo::LocalInfo& local_info)
: pool_(scope.symbolTable()), local_info_(local_info),
stat_namespace_(pool_.add(CustomStatNamespace)),
requests_total_(pool_.add("istio_requests_total")),
request_duration_milliseconds_(pool_.add("istio_request_duration_milliseconds")),
Expand Down Expand Up @@ -214,7 +214,8 @@ struct Context : public Singleton::Instance {
cluster_name_(pool_.add(extractString(local_info.node().metadata(), "CLUSTER_ID"))),
waypoint_(pool_.add("waypoint")), istio_build_(pool_.add("istio_build")),
component_(pool_.add("component")), proxy_(pool_.add("proxy")), tag_(pool_.add("tag")),
istio_version_(pool_.add(extractString(local_info.node().metadata(), "ISTIO_VERSION"))) {
istio_version_(pool_.add(extractString(local_info.node().metadata(), "ISTIO_VERSION"))),
scope_(scope.createScope("", true)) {
all_metrics_ = {
{"requests_total", requests_total_},
{"request_duration_milliseconds", request_duration_milliseconds_},
Expand Down Expand Up @@ -335,6 +336,9 @@ struct Context : public Singleton::Instance {
const Stats::StatName proxy_;
const Stats::StatName tag_;
const Stats::StatName istio_version_;

// Shared evictable stats scope
Stats::ScopeSharedPtr scope_;
}; // namespace

using ContextSharedPtr = std::shared_ptr<Context>;
Expand Down Expand Up @@ -443,86 +447,15 @@ struct MetricOverrides : public Logger::Loggable<Logger::Id::filter> {
absl::flat_hash_map<std::string, uint32_t> expression_ids_;
};

// Self-managed scope with active rotation. Envoy stats scope controls the
// lifetime of the individual metrics. Because the scope is attached to xDS
// resources, metrics with data derived from the requests can accumulate and
// grow indefinitely for long-living xDS resources. To limit this growth,
// this class implements a rotation mechanism, whereas a new scope is created
// periodically to replace the current scope.
//
// The replaced stats scope is deleted gracefully after a minimum of 1s delay
// for two reasons:
//
// 1. Stats flushing is asynchronous and the data may be lost if not flushed
// before the deletion (see stats_flush_interval).
//
// 2. The implementation avoids locking by releasing a raw pointer to workers.
// When the rotation happens on the main, the raw pointer may still be in-use
// by workers for a short duration.
class RotatingScope : public Logger::Loggable<Logger::Id::filter> {
public:
RotatingScope(Server::Configuration::FactoryContext& factory_context, uint64_t rotate_interval_ms,
uint64_t delete_interval_ms)
: parent_scope_(factory_context.scope()), active_scope_(parent_scope_.createScope("")),
raw_scope_(active_scope_.get()), rotate_interval_ms_(rotate_interval_ms),
delete_interval_ms_(delete_interval_ms) {
if (rotate_interval_ms_ > 0) {
ASSERT(delete_interval_ms_ < rotate_interval_ms_);
ASSERT(delete_interval_ms_ >= 1000);
Event::Dispatcher& dispatcher = factory_context.serverFactoryContext().mainThreadDispatcher();
rotate_timer_ = dispatcher.createTimer([this] { onRotate(); });
delete_timer_ = dispatcher.createTimer([this] { onDelete(); });
rotate_timer_->enableTimer(std::chrono::milliseconds(rotate_interval_ms_));
}
}
~RotatingScope() {
if (rotate_timer_) {
rotate_timer_->disableTimer();
rotate_timer_.reset();
}
if (delete_timer_) {
delete_timer_->disableTimer();
delete_timer_.reset();
}
}
Stats::Scope* scope() { return raw_scope_.load(); }

private:
void onRotate() {
ENVOY_LOG(info, "Rotating active Istio stats scope after {}ms.", rotate_interval_ms_);
draining_scope_ = active_scope_;
delete_timer_->enableTimer(std::chrono::milliseconds(delete_interval_ms_));
active_scope_ = parent_scope_.createScope("");
raw_scope_.store(active_scope_.get());
rotate_timer_->enableTimer(std::chrono::milliseconds(rotate_interval_ms_));
}
void onDelete() {
ENVOY_LOG(info, "Deleting draining Istio stats scope after {}ms.", delete_interval_ms_);
draining_scope_.reset();
}
Stats::Scope& parent_scope_;
Stats::ScopeSharedPtr active_scope_;
std::atomic<Stats::Scope*> raw_scope_;
Stats::ScopeSharedPtr draining_scope_{nullptr};
const uint64_t rotate_interval_ms_;
const uint64_t delete_interval_ms_;
Event::TimerPtr rotate_timer_{nullptr};
Event::TimerPtr delete_timer_{nullptr};
};

struct Config : public Logger::Loggable<Logger::Id::filter> {
Config(const stats::PluginConfig& proto_config,
Server::Configuration::FactoryContext& factory_context)
: context_(factory_context.serverFactoryContext().singletonManager().getTyped<Context>(
SINGLETON_MANAGER_REGISTERED_NAME(Context),
[&factory_context] {
return std::make_shared<Context>(
factory_context.serverFactoryContext().scope().symbolTable(),
factory_context.serverFactoryContext().localInfo());
return std::make_shared<Context>(factory_context.serverFactoryContext().scope(),
factory_context.serverFactoryContext().localInfo());
})),
scope_(factory_context, PROTOBUF_GET_MS_OR_DEFAULT(proto_config, rotation_interval, 0),
PROTOBUF_GET_MS_OR_DEFAULT(proto_config, graceful_deletion_interval,
/* 5m */ 1000 * 60 * 5)),
disable_host_header_fallback_(proto_config.disable_host_header_fallback()),
report_duration_(
PROTOBUF_GET_MS_OR_DEFAULT(proto_config, tcp_reporting_duration, /* 5s */ 5000)) {
Expand All @@ -548,7 +481,7 @@ struct Config : public Logger::Loggable<Logger::Id::filter> {
break;
}
if (proto_config.metrics_size() > 0 || proto_config.definitions_size() > 0) {
metric_overrides_ = std::make_unique<MetricOverrides>(context_, scope()->symbolTable());
metric_overrides_ = std::make_unique<MetricOverrides>(context_, scope().symbolTable());
for (const auto& definition : proto_config.definitions()) {
const auto& it = context_->all_metrics_.find(definition.name());
if (it != context_->all_metrics_.end()) {
Expand Down Expand Up @@ -719,12 +652,12 @@ struct Config : public Logger::Loggable<Logger::Id::filter> {
return;
}
auto new_tags = parent_.metric_overrides_->overrideTags(metric, tags, expr_values_);
Stats::Utility::counterFromStatNames(*parent_.scope(),
Stats::Utility::counterFromStatNames(parent_.scope(),
{parent_.context_->stat_namespace_, metric}, new_tags)
.add(amount);
return;
}
Stats::Utility::counterFromStatNames(*parent_.scope(),
Stats::Utility::counterFromStatNames(parent_.scope(),
{parent_.context_->stat_namespace_, metric}, tags)
.add(amount);
}
Expand All @@ -738,12 +671,12 @@ struct Config : public Logger::Loggable<Logger::Id::filter> {
}
auto new_tags = parent_.metric_overrides_->overrideTags(metric, tags, expr_values_);
Stats::Utility::histogramFromStatNames(
*parent_.scope(), {parent_.context_->stat_namespace_, metric}, unit, new_tags)
parent_.scope(), {parent_.context_->stat_namespace_, metric}, unit, new_tags)
.recordValue(value);
return;
}
Stats::Utility::histogramFromStatNames(
*parent_.scope(), {parent_.context_->stat_namespace_, metric}, unit, tags)
parent_.scope(), {parent_.context_->stat_namespace_, metric}, unit, tags)
.recordValue(value);
}

Expand All @@ -756,17 +689,17 @@ struct Config : public Logger::Loggable<Logger::Id::filter> {
switch (metric.type_) {
case MetricOverrides::MetricType::Counter:
Stats::Utility::counterFromStatNames(
*parent_.scope(), {parent_.context_->stat_namespace_, metric.name_}, tags)
parent_.scope(), {parent_.context_->stat_namespace_, metric.name_}, tags)
.add(amount);
break;
case MetricOverrides::MetricType::Histogram:
Stats::Utility::histogramFromStatNames(
*parent_.scope(), {parent_.context_->stat_namespace_, metric.name_},
parent_.scope(), {parent_.context_->stat_namespace_, metric.name_},
Stats::Histogram::Unit::Bytes, tags)
.recordValue(amount);
break;
case MetricOverrides::MetricType::Gauge:
Stats::Utility::gaugeFromStatNames(*parent_.scope(),
Stats::Utility::gaugeFromStatNames(parent_.scope(),
{parent_.context_->stat_namespace_, metric.name_},
Stats::Gauge::ImportMode::Accumulate, tags)
.set(amount);
Expand Down Expand Up @@ -797,10 +730,9 @@ struct Config : public Logger::Loggable<Logger::Id::filter> {
}

Reporter reporter() const { return reporter_; }
Stats::Scope* scope() { return scope_.scope(); }
Stats::Scope& scope() { return *context_->scope_; }

ContextSharedPtr context_;
RotatingScope scope_;
Reporter reporter_;

const bool disable_host_header_fallback_;
Expand All @@ -817,7 +749,7 @@ class IstioStatsFilter : public Http::PassThroughFilter,
public Network::ConnectionCallbacks {
public:
IstioStatsFilter(ConfigSharedPtr config)
: config_(config), context_(*config->context_), pool_(config->scope()->symbolTable()),
: config_(config), context_(*config->context_), pool_(config->scope().symbolTable()),
stream_(*config_, pool_) {
tags_.reserve(25);
switch (config_->reporter()) {
Expand Down
7 changes: 4 additions & 3 deletions test/envoye2e/stats_plugin/stats_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -898,9 +898,10 @@ func TestTCPStatsServerWaypointProxyCONNECT(t *testing.T) {

func TestStatsExpiry(t *testing.T) {
params := driver.NewTestParams(t, map[string]string{
"RequestCount": "1",
"StatsConfig": driver.LoadTestData("testdata/bootstrap/stats.yaml.tmpl"),
"StatsFilterClientConfig": driver.LoadTestJSON("testdata/stats/client_config_expiry.yaml"),
"RequestCount": "1",
"StatsConfig": driver.LoadTestData("testdata/bootstrap/stats.yaml.tmpl") + "\n" +
driver.LoadTestData("testdata/bootstrap/stats_expiry.yaml.tmpl"),
"StatsFilterClientConfig": driver.LoadTestJSON("testdata/stats/client_config.yaml"),
"StatsFilterServerConfig": driver.LoadTestJSON("testdata/stats/server_config.yaml"),
}, envoye2e.ProxyE2ETests)
params.Vars["ClientMetadata"] = params.LoadTestData("testdata/client_node_metadata.json.tmpl")
Expand Down
2 changes: 2 additions & 0 deletions testdata/bootstrap/stats_expiry.yaml.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
stats_flush_interval: 1s
stats_eviction_interval: 1s
2 changes: 0 additions & 2 deletions testdata/stats/client_config_expiry.yaml

This file was deleted.