diff --git a/go.mod b/go.mod index 4c2ee7673f8..d046ac27a6a 100644 --- a/go.mod +++ b/go.mod @@ -5,12 +5,12 @@ go 1.24 require ( github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 github.com/envoyproxy/go-control-plane v0.13.5-0.20250816044120-872f08a0dcd7 - github.com/envoyproxy/go-control-plane/envoy v1.32.4 + github.com/envoyproxy/go-control-plane/envoy v1.32.5-0.20250902220953-4a410393a630 github.com/golang/protobuf v1.5.4 github.com/google/go-cmp v0.7.0 - github.com/prometheus/client_model v0.6.1 + github.com/prometheus/client_model v0.6.2 github.com/prometheus/common v0.46.0 - go.opentelemetry.io/proto/otlp v1.1.0 + go.opentelemetry.io/proto/otlp v1.7.1 go.starlark.net v0.0.0-20240123142251-f86470692795 google.golang.org/genproto/googleapis/rpc v0.0.0-20250728155136-f173205681a0 google.golang.org/grpc v1.74.2 @@ -23,7 +23,7 @@ require ( cel.dev/expr v0.24.0 // indirect github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 // indirect github.com/envoyproxy/protoc-gen-validate v1.2.1 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect golang.org/x/net v0.42.0 // indirect diff --git a/go.sum b/go.sum index 178325ff859..5c1bed78cf8 100644 --- a/go.sum +++ b/go.sum @@ -9,6 +9,8 @@ github.com/envoyproxy/go-control-plane v0.13.5-0.20250816044120-872f08a0dcd7 h1: github.com/envoyproxy/go-control-plane v0.13.5-0.20250816044120-872f08a0dcd7/go.mod h1:QfIsBfUKS+TDsIM8Ynd92VrjDtBgWW1CpjLXSBTlyEI= github.com/envoyproxy/go-control-plane/envoy v1.32.4 h1:jb83lalDRZSpPWW2Z7Mck/8kXZ5CQAFYVjQcdVIr83A= github.com/envoyproxy/go-control-plane/envoy v1.32.4/go.mod h1:Gzjc5k8JcJswLjAx1Zm+wSYE20UrLtt7JZMWiWQXQEw= +github.com/envoyproxy/go-control-plane/envoy v1.32.5-0.20250902220953-4a410393a630 h1:UlhERwhc4Iea3rHstrWA1ApDxU/WQn5Wc2cKxgvo08Y= +github.com/envoyproxy/go-control-plane/envoy v1.32.5-0.20250902220953-4a410393a630/go.mod h1:2LcmvJoXsDSrsGZIxGM0Gah9ykiwTn/kgjyQdnNH8Jc= github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= github.com/envoyproxy/protoc-gen-validate v1.2.1 h1:DEo3O99U8j4hBFwbJfrz9VtgcDfUKS7KJ7spH3d86P8= @@ -26,6 +28,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0 h1:Wqo399gCIufwto+VfwCSvsnfGpF/w5E9CNxSwbpD6No= github.com/grpc-ecosystem/grpc-gateway/v2 v2.19.0/go.mod h1:qmOFXW2epJhM0qSnUUYpldc7gVz2KMQwJ/QYCDIa7XU= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1 h1:X5VWvz21y3gzm9Nw/kaUeku/1+uBhcekkmy4IkffJww= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.1/go.mod h1:Zanoh4+gvIgluNqcfMVTJueD4wSS5hT7zTt4Mrutd90= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -36,6 +40,8 @@ github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZb github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= github.com/prometheus/common v0.46.0 h1:doXzt5ybi1HBKpsZOL0sSkaNHJJqkyfEWZGGqqScV0Y= github.com/prometheus/common v0.46.0/go.mod h1:Tp0qkxpb9Jsg54QMe+EAmqXkSV7Evdy1BTn+g2pa/hQ= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= @@ -56,6 +62,8 @@ go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKr go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= go.opentelemetry.io/proto/otlp v1.1.0 h1:2Di21piLrCqJ3U3eXGCTPHE9R8Nh+0uglSnOyxikMeI= go.opentelemetry.io/proto/otlp v1.1.0/go.mod h1:GpBHCBWiqvVLDqmHZsoMM3C5ySeKTC7ej/RNTae6MdY= +go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= +go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= go.starlark.net v0.0.0-20240123142251-f86470692795 h1:LmbG8Pq7KDGkglKVn8VpZOZj6vb9b8nKEGcg9l03epM= go.starlark.net v0.0.0-20240123142251-f86470692795/go.mod h1:LcLNIzVOMp4oV+uusnpk+VU+SzXaJakUuBjoCSWH5dM= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= diff --git a/source/extensions/filters/http/istio_stats/config.proto b/source/extensions/filters/http/istio_stats/config.proto index cff0963a198..5f50e33e698 100644 --- a/source/extensions/filters/http/istio_stats/config.proto +++ b/source/extensions/filters/http/istio_stats/config.proto @@ -114,9 +114,11 @@ message PluginConfig { // Metric scope rotation interval. Set to 0 to disable the metric scope rotation. // Defaults to 0. + // DEPRECATED. google.protobuf.Duration rotation_interval = 11; // Metric expiry graceful deletion interval. No-op if the metric rotation is disabled. // Defaults to 5m. Must be >=1s. + // DEPRECATED. google.protobuf.Duration graceful_deletion_interval = 12; } diff --git a/source/extensions/filters/http/istio_stats/istio_stats.cc b/source/extensions/filters/http/istio_stats/istio_stats.cc index 3cb4ddf6dc0..5085047597b 100644 --- a/source/extensions/filters/http/istio_stats/istio_stats.cc +++ b/source/extensions/filters/http/istio_stats/istio_stats.cc @@ -161,8 +161,8 @@ peerInfo(Reporter reporter, const StreamInfo::FilterState& filter_state) { // Process-wide context shared with all filter instances. struct Context : public Singleton::Instance { - explicit Context(Stats::SymbolTable& symbol_table, const LocalInfo::LocalInfo& local_info) - : pool_(symbol_table), local_info_(local_info), + explicit Context(Stats::Scope& scope, const LocalInfo::LocalInfo& local_info) + : pool_(scope.symbolTable()), local_info_(local_info), stat_namespace_(pool_.add(CustomStatNamespace)), requests_total_(pool_.add("istio_requests_total")), request_duration_milliseconds_(pool_.add("istio_request_duration_milliseconds")), @@ -214,7 +214,8 @@ struct Context : public Singleton::Instance { cluster_name_(pool_.add(extractString(local_info.node().metadata(), "CLUSTER_ID"))), waypoint_(pool_.add("waypoint")), istio_build_(pool_.add("istio_build")), component_(pool_.add("component")), proxy_(pool_.add("proxy")), tag_(pool_.add("tag")), - istio_version_(pool_.add(extractString(local_info.node().metadata(), "ISTIO_VERSION"))) { + istio_version_(pool_.add(extractString(local_info.node().metadata(), "ISTIO_VERSION"))), + scope_(scope.createScope("", true)) { all_metrics_ = { {"requests_total", requests_total_}, {"request_duration_milliseconds", request_duration_milliseconds_}, @@ -335,6 +336,9 @@ struct Context : public Singleton::Instance { const Stats::StatName proxy_; const Stats::StatName tag_; const Stats::StatName istio_version_; + + // Shared evictable stats scope + Stats::ScopeSharedPtr scope_; }; // namespace using ContextSharedPtr = std::shared_ptr; @@ -443,86 +447,15 @@ struct MetricOverrides : public Logger::Loggable { absl::flat_hash_map expression_ids_; }; -// Self-managed scope with active rotation. Envoy stats scope controls the -// lifetime of the individual metrics. Because the scope is attached to xDS -// resources, metrics with data derived from the requests can accumulate and -// grow indefinitely for long-living xDS resources. To limit this growth, -// this class implements a rotation mechanism, whereas a new scope is created -// periodically to replace the current scope. -// -// The replaced stats scope is deleted gracefully after a minimum of 1s delay -// for two reasons: -// -// 1. Stats flushing is asynchronous and the data may be lost if not flushed -// before the deletion (see stats_flush_interval). -// -// 2. The implementation avoids locking by releasing a raw pointer to workers. -// When the rotation happens on the main, the raw pointer may still be in-use -// by workers for a short duration. -class RotatingScope : public Logger::Loggable { -public: - RotatingScope(Server::Configuration::FactoryContext& factory_context, uint64_t rotate_interval_ms, - uint64_t delete_interval_ms) - : parent_scope_(factory_context.scope()), active_scope_(parent_scope_.createScope("")), - raw_scope_(active_scope_.get()), rotate_interval_ms_(rotate_interval_ms), - delete_interval_ms_(delete_interval_ms) { - if (rotate_interval_ms_ > 0) { - ASSERT(delete_interval_ms_ < rotate_interval_ms_); - ASSERT(delete_interval_ms_ >= 1000); - Event::Dispatcher& dispatcher = factory_context.serverFactoryContext().mainThreadDispatcher(); - rotate_timer_ = dispatcher.createTimer([this] { onRotate(); }); - delete_timer_ = dispatcher.createTimer([this] { onDelete(); }); - rotate_timer_->enableTimer(std::chrono::milliseconds(rotate_interval_ms_)); - } - } - ~RotatingScope() { - if (rotate_timer_) { - rotate_timer_->disableTimer(); - rotate_timer_.reset(); - } - if (delete_timer_) { - delete_timer_->disableTimer(); - delete_timer_.reset(); - } - } - Stats::Scope* scope() { return raw_scope_.load(); } - -private: - void onRotate() { - ENVOY_LOG(info, "Rotating active Istio stats scope after {}ms.", rotate_interval_ms_); - draining_scope_ = active_scope_; - delete_timer_->enableTimer(std::chrono::milliseconds(delete_interval_ms_)); - active_scope_ = parent_scope_.createScope(""); - raw_scope_.store(active_scope_.get()); - rotate_timer_->enableTimer(std::chrono::milliseconds(rotate_interval_ms_)); - } - void onDelete() { - ENVOY_LOG(info, "Deleting draining Istio stats scope after {}ms.", delete_interval_ms_); - draining_scope_.reset(); - } - Stats::Scope& parent_scope_; - Stats::ScopeSharedPtr active_scope_; - std::atomic raw_scope_; - Stats::ScopeSharedPtr draining_scope_{nullptr}; - const uint64_t rotate_interval_ms_; - const uint64_t delete_interval_ms_; - Event::TimerPtr rotate_timer_{nullptr}; - Event::TimerPtr delete_timer_{nullptr}; -}; - struct Config : public Logger::Loggable { Config(const stats::PluginConfig& proto_config, Server::Configuration::FactoryContext& factory_context) : context_(factory_context.serverFactoryContext().singletonManager().getTyped( SINGLETON_MANAGER_REGISTERED_NAME(Context), [&factory_context] { - return std::make_shared( - factory_context.serverFactoryContext().scope().symbolTable(), - factory_context.serverFactoryContext().localInfo()); + return std::make_shared(factory_context.serverFactoryContext().scope(), + factory_context.serverFactoryContext().localInfo()); })), - scope_(factory_context, PROTOBUF_GET_MS_OR_DEFAULT(proto_config, rotation_interval, 0), - PROTOBUF_GET_MS_OR_DEFAULT(proto_config, graceful_deletion_interval, - /* 5m */ 1000 * 60 * 5)), disable_host_header_fallback_(proto_config.disable_host_header_fallback()), report_duration_( PROTOBUF_GET_MS_OR_DEFAULT(proto_config, tcp_reporting_duration, /* 5s */ 5000)) { @@ -548,7 +481,7 @@ struct Config : public Logger::Loggable { break; } if (proto_config.metrics_size() > 0 || proto_config.definitions_size() > 0) { - metric_overrides_ = std::make_unique(context_, scope()->symbolTable()); + metric_overrides_ = std::make_unique(context_, scope().symbolTable()); for (const auto& definition : proto_config.definitions()) { const auto& it = context_->all_metrics_.find(definition.name()); if (it != context_->all_metrics_.end()) { @@ -719,12 +652,12 @@ struct Config : public Logger::Loggable { return; } auto new_tags = parent_.metric_overrides_->overrideTags(metric, tags, expr_values_); - Stats::Utility::counterFromStatNames(*parent_.scope(), + Stats::Utility::counterFromStatNames(parent_.scope(), {parent_.context_->stat_namespace_, metric}, new_tags) .add(amount); return; } - Stats::Utility::counterFromStatNames(*parent_.scope(), + Stats::Utility::counterFromStatNames(parent_.scope(), {parent_.context_->stat_namespace_, metric}, tags) .add(amount); } @@ -738,12 +671,12 @@ struct Config : public Logger::Loggable { } auto new_tags = parent_.metric_overrides_->overrideTags(metric, tags, expr_values_); Stats::Utility::histogramFromStatNames( - *parent_.scope(), {parent_.context_->stat_namespace_, metric}, unit, new_tags) + parent_.scope(), {parent_.context_->stat_namespace_, metric}, unit, new_tags) .recordValue(value); return; } Stats::Utility::histogramFromStatNames( - *parent_.scope(), {parent_.context_->stat_namespace_, metric}, unit, tags) + parent_.scope(), {parent_.context_->stat_namespace_, metric}, unit, tags) .recordValue(value); } @@ -756,17 +689,17 @@ struct Config : public Logger::Loggable { switch (metric.type_) { case MetricOverrides::MetricType::Counter: Stats::Utility::counterFromStatNames( - *parent_.scope(), {parent_.context_->stat_namespace_, metric.name_}, tags) + parent_.scope(), {parent_.context_->stat_namespace_, metric.name_}, tags) .add(amount); break; case MetricOverrides::MetricType::Histogram: Stats::Utility::histogramFromStatNames( - *parent_.scope(), {parent_.context_->stat_namespace_, metric.name_}, + parent_.scope(), {parent_.context_->stat_namespace_, metric.name_}, Stats::Histogram::Unit::Bytes, tags) .recordValue(amount); break; case MetricOverrides::MetricType::Gauge: - Stats::Utility::gaugeFromStatNames(*parent_.scope(), + Stats::Utility::gaugeFromStatNames(parent_.scope(), {parent_.context_->stat_namespace_, metric.name_}, Stats::Gauge::ImportMode::Accumulate, tags) .set(amount); @@ -797,10 +730,9 @@ struct Config : public Logger::Loggable { } Reporter reporter() const { return reporter_; } - Stats::Scope* scope() { return scope_.scope(); } + Stats::Scope& scope() { return *context_->scope_; } ContextSharedPtr context_; - RotatingScope scope_; Reporter reporter_; const bool disable_host_header_fallback_; @@ -817,7 +749,7 @@ class IstioStatsFilter : public Http::PassThroughFilter, public Network::ConnectionCallbacks { public: IstioStatsFilter(ConfigSharedPtr config) - : config_(config), context_(*config->context_), pool_(config->scope()->symbolTable()), + : config_(config), context_(*config->context_), pool_(config->scope().symbolTable()), stream_(*config_, pool_) { tags_.reserve(25); switch (config_->reporter()) { diff --git a/test/envoye2e/stats_plugin/stats_test.go b/test/envoye2e/stats_plugin/stats_test.go index efa72a17eb5..28691918f92 100644 --- a/test/envoye2e/stats_plugin/stats_test.go +++ b/test/envoye2e/stats_plugin/stats_test.go @@ -898,9 +898,10 @@ func TestTCPStatsServerWaypointProxyCONNECT(t *testing.T) { func TestStatsExpiry(t *testing.T) { params := driver.NewTestParams(t, map[string]string{ - "RequestCount": "1", - "StatsConfig": driver.LoadTestData("testdata/bootstrap/stats.yaml.tmpl"), - "StatsFilterClientConfig": driver.LoadTestJSON("testdata/stats/client_config_expiry.yaml"), + "RequestCount": "1", + "StatsConfig": driver.LoadTestData("testdata/bootstrap/stats.yaml.tmpl") + "\n" + + driver.LoadTestData("testdata/bootstrap/stats_expiry.yaml.tmpl"), + "StatsFilterClientConfig": driver.LoadTestJSON("testdata/stats/client_config.yaml"), "StatsFilterServerConfig": driver.LoadTestJSON("testdata/stats/server_config.yaml"), }, envoye2e.ProxyE2ETests) params.Vars["ClientMetadata"] = params.LoadTestData("testdata/client_node_metadata.json.tmpl") diff --git a/testdata/bootstrap/stats_expiry.yaml.tmpl b/testdata/bootstrap/stats_expiry.yaml.tmpl new file mode 100644 index 00000000000..d0410bded2a --- /dev/null +++ b/testdata/bootstrap/stats_expiry.yaml.tmpl @@ -0,0 +1,2 @@ +stats_flush_interval: 1s +stats_eviction_interval: 1s diff --git a/testdata/stats/client_config_expiry.yaml b/testdata/stats/client_config_expiry.yaml deleted file mode 100644 index 1b95d3d472d..00000000000 --- a/testdata/stats/client_config_expiry.yaml +++ /dev/null @@ -1,2 +0,0 @@ -rotation_interval: 2s -graceful_deletion_interval: 1s