diff --git a/docs/root/statistics.md b/docs/root/statistics.md new file mode 100644 index 000000000..932e5e53f --- /dev/null +++ b/docs/root/statistics.md @@ -0,0 +1,134 @@ +# Nighthawk Statistics + +## Background +Currently Nighthawk only outputs metrics at the end of a test run and there are +no metrics streamed during a test run. The work to stream its metrics is in +progress. + + +## Statistics in Nighthawk +All statistics below defined in Nighthawk are per worker. + +For counter metric, Nighthawk use Envoy's Counter directly. For histogram +metric, Nighthawk wraps Envoy's Histogram into its own Statistic concept (see +[#391](https://github.com/envoyproxy/nighthawk/pull/391)). + +Name | Type | Description +-----| ----- | ---------------- +upstream_rq_total | Counter | Total number of requests sent from Nighthawk +http_1xx | Counter | Total number of response with code 1xx +http_2xx | Counter | Total number of response with code 2xx +http_3xx | Counter | Total number of response with code 3xx +http_4xx | Counter | Total number of response with code 4xx +http_5xx | Counter | Total number of response with code 5xx +http_xxx | Counter | Total number of response with code <100 or >=600 +stream_resets | Counter | Total number of stream reset +pool_overflow | Counter | Total number of times connection pool overflowed +pool_connection_failure | Counter | Total number of times pool connection failed +benchmark_http_client.latency_1xx | HdrStatistic | Latency (in Nanosecond) histogram of request with code 1xx +benchmark_http_client.latency_2xx | HdrStatistic | Latency (in Nanosecond) histogram of request with code 2xx +benchmark_http_client.latency_3xx | HdrStatistic | Latency (in Nanosecond) histogram of request with code 3xx +benchmark_http_client.latency_4xx | HdrStatistic | Latency (in Nanosecond) histogram of request with code 4xx +benchmark_http_client.latency_5xx | HdrStatistic | Latency (in Nanosecond) histogram of request with code 5xx +benchmark_http_client.latency_xxx | HdrStatistic | Latency (in Nanosecond) histogram of request with code <100 or >=600 +benchmark_http_client.queue_to_connect | HdrStatistic | Histogram of request connection time (in Nanosecond) +benchmark_http_client.request_to_response | HdrStatistic | Latency (in Nanosecond) histogram include requests with stream reset or pool failure +benchmark_http_client.response_header_size | StreamingStatistic | Statistic of response header size (min, max, mean, pstdev values in bytes) +benchmark_http_client.response_body_size | StreamingStatistic | Statistic of response body size (min, max, mean, pstdev values in bytes) +sequencer.callback | HdrStatistic | Latency (in Nanosecond) histogram of unblocked requests +sequencer.blocking | HdrStatistic | Latency (in Nanosecond) histogram of blocked requests + + +## Envoy Metrics Model + +[Envoy](https://github.com/envoyproxy/envoy) has 3 types of metrics +- Counters: Unsigned integers (can only increase) represent how many times an + event happens, e.g. total number of requests. +- Gauges: Unsigned integers (can increase or decrease), e.g. number of active + connections. +- Histograms: Unsigned integers that will yield summarized percentile values. + E.g. latency distributions. + +In Envoy, the stat +[Store](https://github.com/envoyproxy/envoy/blob/74530c92cfa3682b49b540fddf2aba40ac10c68e/include/envoy/stats/store.h#L29) +is a singleton and provides a simple interface by which the rest of the code can +obtain handles to +[scopes](https://github.com/envoyproxy/envoy/blob/958745d658752f90f544296d9e75030519a9fb84/include/envoy/stats/scope.h#L37), +counters, gauges, and histograms. Envoy counters and gauges are periodically +(configured at ~5 sec interval) flushed to the sinks. Note that currently +histogram values are sent directly to the sinks. A stat +[Sink](https://github.com/envoyproxy/envoy/blob/74530c92cfa3682b49b540fddf2aba40ac10c68e/include/envoy/stats/sink.h#L48) +is an interface that takes generic stat data and translates it into a +backend-specific wire format. Currently Envoy supports the TCP and UDP +[statsd](https://github.com/b/statsd_spec) protocol (implemented in +[statsd.h](https://github.com/envoyproxy/envoy/blob/master/source/extensions/stat_sinks/common/statsd/statsd.h)). +Users can create their own Sink subclass to translate Envoy metrics into +backend-specific format. + +Envoy metrics can be defined using a macro, e.g. +```cc +// Define Envoy stats. +#define ALL_CLUSTER_STATS(COUNTER, GAUGE, HISTOGRAM) + COUNTER(upstream_cx_total) + GAUGE(upstream_cx_active, NeverImport) + HISTOGRAM(upstream_cx_length, Milliseconds) +// Put these stats as members of a struct. +struct ClusterStats { + ALL_CLUSTER_STATS(GENERATE_COUNTER_STRUCT, GENERATE_GAUGE_STRUCT, GENERATE_HISTOGRAM_STRUCT) +}; +// Instantiate the above struct using a Stats::Pool. +ClusterStats stats{ + ALL_CLUSTER_STATS(POOL_COUNTER(...), POOL_GAUGE(...), POOL_HISTOGRAM(...))}; + +// Stats can be updated in the code: +stats.upstream_cx_total_.inc(); +stats.upstream_cx_active_.set(...); +stats.upstream_cx_length_.recordValue(...); +``` + +## Envoy Metrics Limitation +Currently Envoy metrics don't support key-value map. As a result, for metrics to +be broken down by certain dimensions, we need to define a separate metric for +each dimension. For example, currently Nighthawk defines +[separate counters](https://github.com/envoyproxy/nighthawk/blob/master/source/client/benchmark_client_impl.h#L35-L40) +to monitor the number of responses with corresponding response code. + +## Envoy Metrics Flush +Envoy uses a flush timer to periodically flush metrics into stat sinks +([here](https://github.com/envoyproxy/envoy/blob/74530c92cfa3682b49b540fddf2aba40ac10c68e/source/server/server.cc#L479-L480)) +at a configured interval (default to 5 sec). For every metric flush, Envoy will +call the function +[flushMetricsToSinks()](https://github.com/envoyproxy/envoy/blob/74530c92cfa3682b49b540fddf2aba40ac10c68e/source/server/server.cc#L175) +to create a metric snapshot from Envoy stat store and flush the snapshot to all +sinks through `sink->flush(snapshot)`. + + +## Metrics Export in Nighthawk +Currently a single Nighthawk can run with multiple workers. In the future, +Nighthawk will be extended to be able to run multiple instances together. Since +each Nighthawk worker sends requests independently, we decided to export per +worker level metrics since it provides several advantages over global level +metrics (aggregated across all workers). +- Per worker level metrics provide information about the performance of each + worker which will be hidden by the global level metrics. +- Keep the workers independent which makes it easier/efficient to scale up to + multiple Nighthawks with large numbers of workers. (The work to scale up to + multiple Nighthawks is still under development). + +Envoy metrics can be defined at per worker level using +[Scope](https://github.com/envoyproxy/envoy/blob/e9c2c8c4a0141c9634316e8283f98f412d0dd207/include/envoy/stats/scope.h#L35) +( e.g. `cluster..total_request_sent`). The dynamic portions of +metric (e.g. `worker_id`) can be embedded into the metric name. A +[TagSpecifier](https://github.com/envoyproxy/envoy/blob/7a652daf35d7d4a6a6bad5a010fe65947ee4411a/api/envoy/config/metrics/v3/stats.proto#L182) +can be specified in the bootstrap configuration, which will transform dynamic +portions into tags. When per worker level metrics are exported from Nighthawk, +multiple per worker level metrics can be converted into a single metric with a +`worker_id` label in the stat Sink if the corresponding backend metric supports +key-value map. + +## Reference +- [Nighthawk: architecture and key + concepts](https://github.com/envoyproxy/nighthawk/blob/master/docs/root/overview.md) +- [Envoy Stats + System](https://github.com/envoyproxy/envoy/blob/master/source/docs/stats.md) +- [Envoy Stats blog](https://blog.envoyproxy.io/envoy-stats-b65c7f363342) diff --git a/include/nighthawk/client/factories.h b/include/nighthawk/client/factories.h index dc393d8a3..cc743d36c 100644 --- a/include/nighthawk/client/factories.h +++ b/include/nighthawk/client/factories.h @@ -33,9 +33,10 @@ class BenchmarkClientFactory { * @param scope stats scope for any stats tracked by the benchmark client. * @param cluster_manager Cluster manager preconfigured with our target cluster. * @param http_tracer Shared pointer to an http tracer implementation (e.g. Zipkin). - * @param cluster_name Name of the cluster that this benchmark client will use. In conjunction - * with cluster_manager this will allow the this BenchmarkClient to access the target connection - * pool. + * @param cluster_name Name of the cluster that this benchmark client + * will use. In conjunction with cluster_manager this will allow the this BenchmarkClient to + * access the target connection pool. + * @param worker_id Worker number. * @param request_source Source of request-specifiers. Will be queries every time the * BenchmarkClient is asked to issue a request. * @@ -45,7 +46,7 @@ class BenchmarkClientFactory { Envoy::Stats::Scope& scope, Envoy::Upstream::ClusterManagerPtr& cluster_manager, Envoy::Tracing::HttpTracerSharedPtr& http_tracer, - absl::string_view cluster_name, + absl::string_view cluster_name, int worker_id, RequestSource& request_source) const PURE; }; diff --git a/source/client/benchmark_client_impl.cc b/source/client/benchmark_client_impl.cc index 0a76c1547..32c10a4c1 100644 --- a/source/client/benchmark_client_impl.cc +++ b/source/client/benchmark_client_impl.cc @@ -20,6 +20,34 @@ using namespace std::chrono_literals; namespace Nighthawk { namespace Client { +BenchmarkClientStatistic::BenchmarkClientStatistic(BenchmarkClientStatistic&& statistic) noexcept + : connect_statistic(std::move(statistic.connect_statistic)), + response_statistic(std::move(statistic.response_statistic)), + response_header_size_statistic(std::move(statistic.response_header_size_statistic)), + response_body_size_statistic(std::move(statistic.response_body_size_statistic)), + latency_1xx_statistic(std::move(statistic.latency_1xx_statistic)), + latency_2xx_statistic(std::move(statistic.latency_2xx_statistic)), + latency_3xx_statistic(std::move(statistic.latency_3xx_statistic)), + latency_4xx_statistic(std::move(statistic.latency_4xx_statistic)), + latency_5xx_statistic(std::move(statistic.latency_5xx_statistic)), + latency_xxx_statistic(std::move(statistic.latency_xxx_statistic)) {} + +BenchmarkClientStatistic::BenchmarkClientStatistic( + StatisticPtr&& connect_stat, StatisticPtr&& response_stat, + StatisticPtr&& response_header_size_stat, StatisticPtr&& response_body_size_stat, + StatisticPtr&& latency_1xx_stat, StatisticPtr&& latency_2xx_stat, + StatisticPtr&& latency_3xx_stat, StatisticPtr&& latency_4xx_stat, + StatisticPtr&& latency_5xx_stat, StatisticPtr&& latency_xxx_stat) + : connect_statistic(std::move(connect_stat)), response_statistic(std::move(response_stat)), + response_header_size_statistic(std::move(response_header_size_stat)), + response_body_size_statistic(std::move(response_body_size_stat)), + latency_1xx_statistic(std::move(latency_1xx_stat)), + latency_2xx_statistic(std::move(latency_2xx_stat)), + latency_3xx_statistic(std::move(latency_3xx_stat)), + latency_4xx_statistic(std::move(latency_4xx_stat)), + latency_5xx_statistic(std::move(latency_5xx_stat)), + latency_xxx_statistic(std::move(latency_xxx_stat)) {} + Envoy::Http::ConnectionPool::Cancellable* Http1PoolImpl::newStream(Envoy::Http::ResponseDecoder& response_decoder, Envoy::Http::ConnectionPool::Callbacks& callbacks) { @@ -49,24 +77,26 @@ Http1PoolImpl::newStream(Envoy::Http::ResponseDecoder& response_decoder, BenchmarkClientHttpImpl::BenchmarkClientHttpImpl( Envoy::Api::Api& api, Envoy::Event::Dispatcher& dispatcher, Envoy::Stats::Scope& scope, - StatisticPtr&& connect_statistic, StatisticPtr&& response_statistic, - StatisticPtr&& response_header_size_statistic, StatisticPtr&& response_body_size_statistic, - bool use_h2, Envoy::Upstream::ClusterManagerPtr& cluster_manager, + BenchmarkClientStatistic& statistic, bool use_h2, + Envoy::Upstream::ClusterManagerPtr& cluster_manager, Envoy::Tracing::HttpTracerSharedPtr& http_tracer, absl::string_view cluster_name, RequestGenerator request_generator, const bool provide_resource_backpressure) : api_(api), dispatcher_(dispatcher), scope_(scope.createScope("benchmark.")), - connect_statistic_(std::move(connect_statistic)), - response_statistic_(std::move(response_statistic)), - response_header_size_statistic_(std::move(response_header_size_statistic)), - response_body_size_statistic_(std::move(response_body_size_statistic)), use_h2_(use_h2), - benchmark_client_stats_({ALL_BENCHMARK_CLIENT_STATS(POOL_COUNTER(*scope_))}), + statistic_(std::move(statistic)), use_h2_(use_h2), + benchmark_client_counters_({ALL_BENCHMARK_CLIENT_COUNTERS(POOL_COUNTER(*scope_))}), cluster_manager_(cluster_manager), http_tracer_(http_tracer), cluster_name_(std::string(cluster_name)), request_generator_(std::move(request_generator)), provide_resource_backpressure_(provide_resource_backpressure) { - connect_statistic_->setId("benchmark_http_client.queue_to_connect"); - response_statistic_->setId("benchmark_http_client.request_to_response"); - response_header_size_statistic_->setId("benchmark_http_client.response_header_size"); - response_body_size_statistic_->setId("benchmark_http_client.response_body_size"); + statistic_.connect_statistic->setId("benchmark_http_client.queue_to_connect"); + statistic_.response_statistic->setId("benchmark_http_client.request_to_response"); + statistic_.response_header_size_statistic->setId("benchmark_http_client.response_header_size"); + statistic_.response_body_size_statistic->setId("benchmark_http_client.response_body_size"); + statistic_.latency_1xx_statistic->setId("benchmark_http_client.latency_1xx"); + statistic_.latency_2xx_statistic->setId("benchmark_http_client.latency_2xx"); + statistic_.latency_3xx_statistic->setId("benchmark_http_client.latency_3xx"); + statistic_.latency_4xx_statistic->setId("benchmark_http_client.latency_4xx"); + statistic_.latency_5xx_statistic->setId("benchmark_http_client.latency_5xx"); + statistic_.latency_xxx_statistic->setId("benchmark_http_client.latency_xxx"); } void BenchmarkClientHttpImpl::terminate() { @@ -79,10 +109,18 @@ void BenchmarkClientHttpImpl::terminate() { StatisticPtrMap BenchmarkClientHttpImpl::statistics() const { StatisticPtrMap statistics; - statistics[connect_statistic_->id()] = connect_statistic_.get(); - statistics[response_statistic_->id()] = response_statistic_.get(); - statistics[response_header_size_statistic_->id()] = response_header_size_statistic_.get(); - statistics[response_body_size_statistic_->id()] = response_body_size_statistic_.get(); + statistics[statistic_.connect_statistic->id()] = statistic_.connect_statistic.get(); + statistics[statistic_.response_statistic->id()] = statistic_.response_statistic.get(); + statistics[statistic_.response_header_size_statistic->id()] = + statistic_.response_header_size_statistic.get(); + statistics[statistic_.response_body_size_statistic->id()] = + statistic_.response_body_size_statistic.get(); + statistics[statistic_.latency_1xx_statistic->id()] = statistic_.latency_1xx_statistic.get(); + statistics[statistic_.latency_2xx_statistic->id()] = statistic_.latency_2xx_statistic.get(); + statistics[statistic_.latency_3xx_statistic->id()] = statistic_.latency_3xx_statistic.get(); + statistics[statistic_.latency_4xx_statistic->id()] = statistic_.latency_4xx_statistic.get(); + statistics[statistic_.latency_5xx_statistic->id()] = statistic_.latency_5xx_statistic.get(); + statistics[statistic_.latency_xxx_statistic->id()] = statistic_.latency_xxx_statistic.get(); return statistics; }; @@ -120,9 +158,9 @@ bool BenchmarkClientHttpImpl::tryStartRequest(CompletionCallback caller_completi auto stream_decoder = new StreamDecoder( dispatcher_, api_.timeSource(), *this, std::move(caller_completion_callback), - *connect_statistic_, *response_statistic_, *response_header_size_statistic_, - *response_body_size_statistic_, request->header(), shouldMeasureLatencies(), content_length, - generator_, http_tracer_); + *statistic_.connect_statistic, *statistic_.response_statistic, + *statistic_.response_header_size_statistic, *statistic_.response_body_size_statistic, + request->header(), shouldMeasureLatencies(), content_length, generator_, http_tracer_); requests_initiated_++; pool_ptr->newStream(*stream_decoder, *stream_decoder); return true; @@ -132,23 +170,23 @@ void BenchmarkClientHttpImpl::onComplete(bool success, const Envoy::Http::ResponseHeaderMap& headers) { requests_completed_++; if (!success) { - benchmark_client_stats_.stream_resets_.inc(); + benchmark_client_counters_.stream_resets_.inc(); } else { ASSERT(headers.Status()); const int64_t status = Envoy::Http::Utility::getResponseStatus(headers); if (status > 99 && status <= 199) { - benchmark_client_stats_.http_1xx_.inc(); + benchmark_client_counters_.http_1xx_.inc(); } else if (status > 199 && status <= 299) { - benchmark_client_stats_.http_2xx_.inc(); + benchmark_client_counters_.http_2xx_.inc(); } else if (status > 299 && status <= 399) { - benchmark_client_stats_.http_3xx_.inc(); + benchmark_client_counters_.http_3xx_.inc(); } else if (status > 399 && status <= 499) { - benchmark_client_stats_.http_4xx_.inc(); + benchmark_client_counters_.http_4xx_.inc(); } else if (status > 499 && status <= 599) { - benchmark_client_stats_.http_5xx_.inc(); + benchmark_client_counters_.http_5xx_.inc(); } else { - benchmark_client_stats_.http_xxx_.inc(); + benchmark_client_counters_.http_xxx_.inc(); } } } @@ -156,11 +194,11 @@ void BenchmarkClientHttpImpl::onComplete(bool success, void BenchmarkClientHttpImpl::onPoolFailure(Envoy::Http::ConnectionPool::PoolFailureReason reason) { switch (reason) { case Envoy::Http::ConnectionPool::PoolFailureReason::Overflow: - benchmark_client_stats_.pool_overflow_.inc(); + benchmark_client_counters_.pool_overflow_.inc(); break; case Envoy::Http::ConnectionPool::PoolFailureReason::LocalConnectionFailure: case Envoy::Http::ConnectionPool::PoolFailureReason::RemoteConnectionFailure: - benchmark_client_stats_.pool_connection_failure_.inc(); + benchmark_client_counters_.pool_connection_failure_.inc(); break; case Envoy::Http::ConnectionPool::PoolFailureReason::Timeout: break; @@ -169,5 +207,22 @@ void BenchmarkClientHttpImpl::onPoolFailure(Envoy::Http::ConnectionPool::PoolFai } } +void BenchmarkClientHttpImpl::exportLatency(const uint32_t response_code, + const uint64_t latency_ns) { + if (response_code > 99 && response_code <= 199) { + statistic_.latency_1xx_statistic->addValue(latency_ns); + } else if (response_code > 199 && response_code <= 299) { + statistic_.latency_2xx_statistic->addValue(latency_ns); + } else if (response_code > 299 && response_code <= 399) { + statistic_.latency_3xx_statistic->addValue(latency_ns); + } else if (response_code > 399 && response_code <= 499) { + statistic_.latency_4xx_statistic->addValue(latency_ns); + } else if (response_code > 499 && response_code <= 599) { + statistic_.latency_5xx_statistic->addValue(latency_ns); + } else { + statistic_.latency_xxx_statistic->addValue(latency_ns); + } +} + } // namespace Client -} // namespace Nighthawk \ No newline at end of file +} // namespace Nighthawk diff --git a/source/client/benchmark_client_impl.h b/source/client/benchmark_client_impl.h index e835f58f3..cc2e9296f 100644 --- a/source/client/benchmark_client_impl.h +++ b/source/client/benchmark_client_impl.h @@ -22,6 +22,8 @@ #include "api/client/options.pb.h" +#include "common/statistic_impl.h" + #include "client/stream_decoder.h" namespace Nighthawk { @@ -31,7 +33,7 @@ using namespace std::chrono_literals; using namespace Envoy; // We need this because of macro expectations. -#define ALL_BENCHMARK_CLIENT_STATS(COUNTER) \ +#define ALL_BENCHMARK_CLIENT_COUNTERS(COUNTER) \ COUNTER(stream_resets) \ COUNTER(http_1xx) \ COUNTER(http_2xx) \ @@ -42,8 +44,35 @@ using namespace Envoy; // We need this because of macro expectations. COUNTER(pool_overflow) \ COUNTER(pool_connection_failure) -struct BenchmarkClientStats { - ALL_BENCHMARK_CLIENT_STATS(GENERATE_COUNTER_STRUCT) +// For counter metrics, Nighthawk use Envoy Counter directly. For histogram metrics, Nighthawk uses +// its own Statistic instead of Envoy Histogram. Here BenchmarkClientCounters contains only counters +// while BenchmarkClientStatistic contains only histograms. +struct BenchmarkClientCounters { + ALL_BENCHMARK_CLIENT_COUNTERS(GENERATE_COUNTER_STRUCT) +}; + +// BenchmarkClientStatistic contains only histogram metrics. +struct BenchmarkClientStatistic { + BenchmarkClientStatistic(BenchmarkClientStatistic&& statistic) noexcept; + BenchmarkClientStatistic(StatisticPtr&& connect_stat, StatisticPtr&& response_stat, + StatisticPtr&& response_header_size_stat, + StatisticPtr&& response_body_size_stat, StatisticPtr&& latency_1xx_stat, + StatisticPtr&& latency_2xx_stat, StatisticPtr&& latency_3xx_stat, + StatisticPtr&& latency_4xx_stat, StatisticPtr&& latency_5xx_stat, + StatisticPtr&& latency_xxx_stat); + + // These are declared order dependent. Changing ordering may trigger on assert upon + // destruction when tls has been involved during usage. + StatisticPtr connect_statistic; + StatisticPtr response_statistic; + StatisticPtr response_header_size_statistic; + StatisticPtr response_body_size_statistic; + StatisticPtr latency_1xx_statistic; + StatisticPtr latency_2xx_statistic; + StatisticPtr latency_3xx_statistic; + StatisticPtr latency_4xx_statistic; + StatisticPtr latency_5xx_statistic; + StatisticPtr latency_xxx_statistic; }; class Http1PoolImpl : public Envoy::Http::Http1::ProdConnPoolImpl { @@ -73,11 +102,8 @@ class BenchmarkClientHttpImpl : public BenchmarkClient, public Envoy::Logger::Loggable { public: BenchmarkClientHttpImpl(Envoy::Api::Api& api, Envoy::Event::Dispatcher& dispatcher, - Envoy::Stats::Scope& scope, StatisticPtr&& connect_statistic, - StatisticPtr&& response_statistic, - StatisticPtr&& response_header_size_statistic, - StatisticPtr&& response_body_size_statistic, bool use_h2, - Envoy::Upstream::ClusterManagerPtr& cluster_manager, + Envoy::Stats::Scope& scope, BenchmarkClientStatistic& statistic, + bool use_h2, Envoy::Upstream::ClusterManagerPtr& cluster_manager, Envoy::Tracing::HttpTracerSharedPtr& http_tracer, absl::string_view cluster_name, RequestGenerator request_generator, const bool provide_resource_backpressure); @@ -105,6 +131,7 @@ class BenchmarkClientHttpImpl : public BenchmarkClient, // StreamDecoderCompletionCallback void onComplete(bool success, const Envoy::Http::ResponseHeaderMap& headers) override; void onPoolFailure(Envoy::Http::ConnectionPool::PoolFailureReason reason) override; + void exportLatency(const uint32_t response_code, const uint64_t latency_ns) override; // Helpers Envoy::Http::ConnectionPool::Instance* pool() { @@ -117,12 +144,7 @@ class BenchmarkClientHttpImpl : public BenchmarkClient, Envoy::Api::Api& api_; Envoy::Event::Dispatcher& dispatcher_; Envoy::Stats::ScopePtr scope_; - // These are declared order dependent. Changing ordering may trigger on assert upon - // destruction when tls has been involved during usage. - StatisticPtr connect_statistic_; - StatisticPtr response_statistic_; - StatisticPtr response_header_size_statistic_; - StatisticPtr response_body_size_statistic_; + BenchmarkClientStatistic statistic_; const bool use_h2_; std::chrono::seconds timeout_{5s}; uint32_t connection_limit_{1}; @@ -134,7 +156,7 @@ class BenchmarkClientHttpImpl : public BenchmarkClient, uint64_t requests_completed_{}; uint64_t requests_initiated_{}; bool measure_latencies_{}; - BenchmarkClientStats benchmark_client_stats_; + BenchmarkClientCounters benchmark_client_counters_; Envoy::Upstream::ClusterManagerPtr& cluster_manager_; Envoy::Tracing::HttpTracerSharedPtr& http_tracer_; std::string cluster_name_; @@ -143,4 +165,4 @@ class BenchmarkClientHttpImpl : public BenchmarkClient, }; } // namespace Client -} // namespace Nighthawk \ No newline at end of file +} // namespace Nighthawk diff --git a/source/client/client_worker_impl.cc b/source/client/client_worker_impl.cc index 09aaba66a..a5d456763 100644 --- a/source/client/client_worker_impl.cc +++ b/source/client/client_worker_impl.cc @@ -33,7 +33,7 @@ ClientWorkerImpl::ClientWorkerImpl(Envoy::Api::Api& api, Envoy::ThreadLocal::Ins fmt::format("{}.requestsource", worker_number))), benchmark_client_(benchmark_client_factory.create( api, *dispatcher_, *worker_number_scope_, cluster_manager, http_tracer_, - fmt::format("{}", worker_number), *request_generator_)), + fmt::format("{}", worker_number), worker_number, *request_generator_)), phase_( std::make_unique("main", sequencer_factory_.create( @@ -106,4 +106,4 @@ StatisticPtrMap ClientWorkerImpl::statistics() const { } } // namespace Client -} // namespace Nighthawk \ No newline at end of file +} // namespace Nighthawk diff --git a/source/client/factories_impl.cc b/source/client/factories_impl.cc index a2e3eb611..eb1187c87 100644 --- a/source/client/factories_impl.cc +++ b/source/client/factories_impl.cc @@ -30,7 +30,7 @@ BenchmarkClientFactoryImpl::BenchmarkClientFactoryImpl(const Options& options) BenchmarkClientPtr BenchmarkClientFactoryImpl::create( Envoy::Api::Api& api, Envoy::Event::Dispatcher& dispatcher, Envoy::Stats::Scope& scope, Envoy::Upstream::ClusterManagerPtr& cluster_manager, - Envoy::Tracing::HttpTracerSharedPtr& http_tracer, absl::string_view cluster_name, + Envoy::Tracing::HttpTracerSharedPtr& http_tracer, absl::string_view cluster_name, int worker_id, RequestSource& request_generator) const { StatisticFactoryImpl statistic_factory(options_); // While we lack options to configure which statistic backend goes where, we directly pass @@ -39,10 +39,18 @@ BenchmarkClientPtr BenchmarkClientFactoryImpl::create( // NullStatistic). // TODO(#292): Create options and have the StatisticFactory consider those when instantiating // statistics. + BenchmarkClientStatistic statistic(statistic_factory.create(), statistic_factory.create(), + std::make_unique(), + std::make_unique(), + std::make_unique(scope, worker_id), + std::make_unique(scope, worker_id), + std::make_unique(scope, worker_id), + std::make_unique(scope, worker_id), + std::make_unique(scope, worker_id), + std::make_unique(scope, worker_id)); auto benchmark_client = std::make_unique( - api, dispatcher, scope, statistic_factory.create(), statistic_factory.create(), - std::make_unique(), std::make_unique(), options_.h2(), - cluster_manager, http_tracer, cluster_name, request_generator.get(), !options_.openLoop()); + api, dispatcher, scope, statistic, options_.h2(), cluster_manager, http_tracer, cluster_name, + request_generator.get(), !options_.openLoop()); auto request_options = options_.toCommandLineOptions()->request_options(); benchmark_client->setConnectionLimit(options_.connections()); benchmark_client->setMaxPendingRequests(options_.maxPendingRequests()); diff --git a/source/client/factories_impl.h b/source/client/factories_impl.h index 928b63f99..5e508b65a 100644 --- a/source/client/factories_impl.h +++ b/source/client/factories_impl.h @@ -31,7 +31,7 @@ class BenchmarkClientFactoryImpl : public OptionBasedFactoryImpl, public Benchma Envoy::Stats::Scope& scope, Envoy::Upstream::ClusterManagerPtr& cluster_manager, Envoy::Tracing::HttpTracerSharedPtr& http_tracer, - absl::string_view cluster_name, + absl::string_view cluster_name, int worker_id, RequestSource& request_generator) const override; }; diff --git a/source/client/stream_decoder.cc b/source/client/stream_decoder.cc index 96c6295d5..a22ec30c2 100644 --- a/source/client/stream_decoder.cc +++ b/source/client/stream_decoder.cc @@ -49,6 +49,14 @@ void StreamDecoder::onComplete(bool success) { ASSERT(!success || complete_); if (success && measure_latencies_) { latency_statistic_.addValue((time_source_.monotonicTime() - request_start_).count()); + // At this point StreamDecoder::decodeHeaders() should have been called. + if (stream_info_.response_code_.has_value()) { + decoder_completion_callback_.exportLatency( + stream_info_.response_code_.value(), + (time_source_.monotonicTime() - request_start_).count()); + } else { + ENVOY_LOG(warn, "response_code is not available in onComplete"); + } } upstream_timing_.onLastUpstreamRxByteReceived(time_source_); response_body_sizes_statistic_.addValue(stream_info_.bytesSent()); diff --git a/source/client/stream_decoder.h b/source/client/stream_decoder.h index d4b2c4fc1..0f6d4aeae 100644 --- a/source/client/stream_decoder.h +++ b/source/client/stream_decoder.h @@ -25,6 +25,7 @@ class StreamDecoderCompletionCallback { virtual ~StreamDecoderCompletionCallback() = default; virtual void onComplete(bool success, const Envoy::Http::ResponseHeaderMap& headers) PURE; virtual void onPoolFailure(Envoy::Http::ConnectionPool::PoolFailureReason reason) PURE; + virtual void exportLatency(const uint32_t response_code, const uint64_t latency_ns) PURE; }; // TODO(oschaaf): create a StreamDecoderPool? @@ -35,7 +36,8 @@ class StreamDecoderCompletionCallback { class StreamDecoder : public Envoy::Http::ResponseDecoder, public Envoy::Http::StreamCallbacks, public Envoy::Http::ConnectionPool::Callbacks, - public Envoy::Event::DeferredDeletable { + public Envoy::Event::DeferredDeletable, + public Envoy::Logger::Loggable { public: StreamDecoder(Envoy::Event::Dispatcher& dispatcher, Envoy::TimeSource& time_source, StreamDecoderCompletionCallback& decoder_completion_callback, diff --git a/source/common/statistic_impl.cc b/source/common/statistic_impl.cc index b4a697a66..672c0020f 100644 --- a/source/common/statistic_impl.cc +++ b/source/common/statistic_impl.cc @@ -317,7 +317,7 @@ SinkableHdrStatistic::SinkableHdrStatistic(Envoy::Stats::Scope& scope, : SinkableStatistic(scope, worker_id) {} void SinkableHdrStatistic::recordValue(uint64_t value) { - addValue(value); + HdrStatistic::addValue(value); // Currently in Envoy Scope implementation, deliverHistogramToSinks() will flush the histogram // value directly to stats Sinks. scope_.deliverHistogramToSinks(*this, value); @@ -328,7 +328,7 @@ SinkableCircllhistStatistic::SinkableCircllhistStatistic(Envoy::Stats::Scope& sc : SinkableStatistic(scope, worker_id) {} void SinkableCircllhistStatistic::recordValue(uint64_t value) { - addValue(value); + CircllhistStatistic::addValue(value); // Currently in Envoy Scope implementation, deliverHistogramToSinks() will flush the histogram // value directly to stats Sinks. scope_.deliverHistogramToSinks(*this, value); diff --git a/source/common/statistic_impl.h b/source/common/statistic_impl.h index d7e1f240f..f3d83b4de 100644 --- a/source/common/statistic_impl.h +++ b/source/common/statistic_impl.h @@ -223,6 +223,9 @@ class SinkableHdrStatistic : public SinkableStatistic, public HdrStatistic { // Overriding name() to return Nighthawk::Statistic::id(). std::string name() const override { return id(); } std::string tagExtractedName() const override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; } + + // Nighthawk::Statistic + void addValue(uint64_t value) override { recordValue(value); } }; // Implementation of sinkable Nighthawk Statistic with Circllhist Histogram. @@ -239,6 +242,9 @@ class SinkableCircllhistStatistic : public SinkableStatistic, public CircllhistS // Overriding name() to return Nighthawk::Statistic::id(). std::string name() const override { return id(); } std::string tagExtractedName() const override { NOT_IMPLEMENTED_GCOVR_EXCL_LINE; } + + // Nighthawk::Statistic + void addValue(uint64_t value) override { recordValue(value); } }; } // namespace Nighthawk diff --git a/test/benchmark_http_client_test.cc b/test/benchmark_http_client_test.cc index f6d67efe2..8835be557 100644 --- a/test/benchmark_http_client_test.cc +++ b/test/benchmark_http_client_test.cc @@ -33,7 +33,12 @@ class BenchmarkClientHttpTest : public Test { dispatcher_(api_->allocateDispatcher("test_thread")), cluster_manager_(std::make_unique()), cluster_info_(std::make_unique()), - http_tracer_(std::make_unique()), response_code_("200") { + http_tracer_(std::make_unique()), response_code_("200"), + statistic_(std::make_unique(), std::make_unique(), + std::make_unique(), std::make_unique(), + std::make_unique(), std::make_unique(), + std::make_unique(), std::make_unique(), + std::make_unique(), std::make_unique()) { EXPECT_CALL(cluster_manager(), httpConnPoolForCluster(_, _, _, _)) .WillRepeatedly(Return(&pool_)); EXPECT_CALL(cluster_manager(), get(_)).WillRepeatedly(Return(&thread_local_cluster_)); @@ -125,9 +130,7 @@ class BenchmarkClientHttpTest : public Test { void setupBenchmarkClient() { client_ = std::make_unique( - *api_, *dispatcher_, store_, std::make_unique(), - std::make_unique(), std::make_unique(), - std::make_unique(), false, cluster_manager_, http_tracer_, "benchmark", + *api_, *dispatcher_, store_, statistic_, false, cluster_manager_, http_tracer_, "benchmark", request_generator_, true); } @@ -161,6 +164,8 @@ class BenchmarkClientHttpTest : public Test { Envoy::Tracing::HttpTracerSharedPtr http_tracer_; std::string response_code_; RequestGenerator request_generator_; + int worker_number_{0}; + Client::BenchmarkClientStatistic statistic_; }; TEST_F(BenchmarkClientHttpTest, BasicTestH1200) { @@ -193,19 +198,48 @@ TEST_F(BenchmarkClientHttpTest, EnableLatencyMeasurement) { testBasicFunctionality(10, 1, 10); EXPECT_EQ(0, client_->statistics()["benchmark_http_client.queue_to_connect"]->count()); EXPECT_EQ(0, client_->statistics()["benchmark_http_client.request_to_response"]->count()); + EXPECT_EQ(10, client_->statistics()["benchmark_http_client.response_header_size"]->count()); + EXPECT_EQ(10, client_->statistics()["benchmark_http_client.response_body_size"]->count()); + EXPECT_EQ(0, client_->statistics()["benchmark_http_client.latency_2xx"]->count()); client_->setShouldMeasureLatencies(true); testBasicFunctionality(10, 1, 10); EXPECT_EQ(10, client_->statistics()["benchmark_http_client.queue_to_connect"]->count()); EXPECT_EQ(10, client_->statistics()["benchmark_http_client.request_to_response"]->count()); + EXPECT_EQ(20, client_->statistics()["benchmark_http_client.response_header_size"]->count()); + EXPECT_EQ(20, client_->statistics()["benchmark_http_client.response_body_size"]->count()); + EXPECT_EQ(10, client_->statistics()["benchmark_http_client.latency_2xx"]->count()); +} + +TEST_F(BenchmarkClientHttpTest, ExportSuccessLatency) { + setupBenchmarkClient(); + uint64_t latency_ns = 10; + client_->exportLatency(/*response_code=*/200, latency_ns); + client_->exportLatency(/*response_code=*/200, latency_ns); + EXPECT_EQ(2, client_->statistics()["benchmark_http_client.latency_2xx"]->count()); + EXPECT_DOUBLE_EQ(latency_ns, client_->statistics()["benchmark_http_client.latency_2xx"]->mean()); +} + +TEST_F(BenchmarkClientHttpTest, ExportErrorLatency) { + setupBenchmarkClient(); + client_->exportLatency(/*response_code=*/100, /*latency_ns=*/1); + client_->exportLatency(/*response_code=*/300, /*latency_ns=*/3); + client_->exportLatency(/*response_code=*/400, /*latency_ns=*/4); + client_->exportLatency(/*response_code=*/500, /*latency_ns=*/5); + client_->exportLatency(/*response_code=*/600, /*latency_ns=*/6); + EXPECT_EQ(1, client_->statistics()["benchmark_http_client.latency_1xx"]->count()); + EXPECT_DOUBLE_EQ(1, client_->statistics()["benchmark_http_client.latency_1xx"]->mean()); + EXPECT_EQ(1, client_->statistics()["benchmark_http_client.latency_xxx"]->count()); + EXPECT_DOUBLE_EQ(3, client_->statistics()["benchmark_http_client.latency_3xx"]->mean()); + EXPECT_EQ(1, client_->statistics()["benchmark_http_client.latency_xxx"]->count()); + EXPECT_DOUBLE_EQ(4, client_->statistics()["benchmark_http_client.latency_4xx"]->mean()); + EXPECT_EQ(1, client_->statistics()["benchmark_http_client.latency_xxx"]->count()); + EXPECT_DOUBLE_EQ(5, client_->statistics()["benchmark_http_client.latency_5xx"]->mean()); + EXPECT_EQ(1, client_->statistics()["benchmark_http_client.latency_xxx"]->count()); + EXPECT_DOUBLE_EQ(6, client_->statistics()["benchmark_http_client.latency_xxx"]->mean()); } TEST_F(BenchmarkClientHttpTest, StatusTrackingInOnComplete) { - auto store = std::make_unique(); - client_ = std::make_unique( - *api_, *dispatcher_, *store, std::make_unique(), - std::make_unique(), std::make_unique(), - std::make_unique(), false, cluster_manager_, http_tracer_, "foo", - request_generator_, true); + setupBenchmarkClient(); Envoy::Http::ResponseHeaderMapPtr header = Envoy::Http::ResponseHeaderMapImpl::create(); header->setStatus(1); diff --git a/test/client_worker_test.cc b/test/client_worker_test.cc index c2a7a9dae..8ffdf6680 100644 --- a/test/client_worker_test.cc +++ b/test/client_worker_test.cc @@ -43,7 +43,7 @@ class ClientWorkerTest : public Test { sequencer_ = new MockSequencer(); request_generator_ = new MockRequestSource(); - EXPECT_CALL(benchmark_client_factory_, create(_, _, _, _, _, _, _)) + EXPECT_CALL(benchmark_client_factory_, create(_, _, _, _, _, _, _, _)) .Times(1) .WillOnce(Return(ByMove(std::unique_ptr(benchmark_client_)))); diff --git a/test/factories_test.cc b/test/factories_test.cc index 2f82f96e5..2a74d8c5b 100644 --- a/test/factories_test.cc +++ b/test/factories_test.cc @@ -45,8 +45,9 @@ TEST_F(FactoriesTest, CreateBenchmarkClient) { EXPECT_CALL(options_, toCommandLineOptions()).Times(1).WillOnce(Return(ByMove(std::move(cmd)))); StaticRequestSourceImpl request_generator( std::make_unique()); - auto benchmark_client = factory.create(*api_, dispatcher_, stats_store_, cluster_manager, - http_tracer_, "foocluster", request_generator); + auto benchmark_client = + factory.create(*api_, dispatcher_, stats_store_, cluster_manager, http_tracer_, "foocluster", + /*worker_id=*/0, request_generator); EXPECT_NE(nullptr, benchmark_client.get()); } diff --git a/test/integration/test_integration_basics.py b/test/integration/test_integration_basics.py index 85bc4f59b..e342c27c4 100644 --- a/test/integration/test_integration_basics.py +++ b/test/integration/test_integration_basics.py @@ -91,6 +91,8 @@ def _mini_stress_test(fixture, args): asserts.assertGreaterEqual( int(global_histograms["benchmark_http_client.request_to_response"]["count"]), 1) + asserts.assertGreaterEqual(int(global_histograms["benchmark_http_client.latency_2xx"]["count"]), + 1) return counters @@ -666,6 +668,8 @@ def test_http_request_release_timing(http_test_server_fixture, qps_parameterizat total_requests) asserts.assertEqual(int(global_histograms["benchmark_http_client.queue_to_connect"]["count"]), total_requests) + asserts.assertEqual(int(global_histograms["benchmark_http_client.latency_2xx"]["count"]), + total_requests) asserts.assertCounterEqual(counters, "benchmark.http_2xx", (total_requests)) diff --git a/test/mocks/client/mock_benchmark_client_factory.h b/test/mocks/client/mock_benchmark_client_factory.h index d67db595c..bddf91eb1 100644 --- a/test/mocks/client/mock_benchmark_client_factory.h +++ b/test/mocks/client/mock_benchmark_client_factory.h @@ -14,11 +14,11 @@ namespace Client { class MockBenchmarkClientFactory : public BenchmarkClientFactory { public: MockBenchmarkClientFactory(); - MOCK_CONST_METHOD7(create, + MOCK_CONST_METHOD8(create, BenchmarkClientPtr(Envoy::Api::Api&, Envoy::Event::Dispatcher&, Envoy::Stats::Scope&, Envoy::Upstream::ClusterManagerPtr&, Envoy::Tracing::HttpTracerSharedPtr&, absl::string_view, - RequestSource& request_generator)); + int, RequestSource& request_generator)); }; } // namespace Client diff --git a/test/statistic_test.cc b/test/statistic_test.cc index b72d76043..a1fd5d1a2 100644 --- a/test/statistic_test.cc +++ b/test/statistic_test.cc @@ -414,11 +414,12 @@ TYPED_TEST(SinkableStatisticTest, SimpleSinkableStatistic) { const uint64_t sample_value = 123; const std::string stat_name = "stat_name"; - EXPECT_CALL(mock_store, deliverHistogramToSinks(_, sample_value)).Times(1); + EXPECT_CALL(mock_store, deliverHistogramToSinks(_, sample_value)).Times(2); stat.recordValue(sample_value); + stat.addValue(sample_value); stat.setId(stat_name); - EXPECT_EQ(1, stat.count()); + EXPECT_EQ(2, stat.count()); Helper::expectNear(123.0, stat.mean(), stat.significantDigits()); EXPECT_DOUBLE_EQ(0, stat.pvariance()); EXPECT_DOUBLE_EQ(0, stat.pstdev()); diff --git a/test/stream_decoder_test.cc b/test/stream_decoder_test.cc index a18cabc10..ca6556906 100644 --- a/test/stream_decoder_test.cc +++ b/test/stream_decoder_test.cc @@ -37,6 +37,9 @@ class StreamDecoderTest : public Test, public StreamDecoderCompletionCallback { stream_decoder_completion_callbacks_++; } void onPoolFailure(Envoy::Http::ConnectionPool::PoolFailureReason) override { pool_failures_++; } + void exportLatency(const uint32_t, const uint64_t) override { + stream_decoder_export_latency_callbacks_++; + } Envoy::Event::TestRealTimeSystem time_system_; Envoy::Stats::IsolatedStoreImpl store_; @@ -49,6 +52,7 @@ class StreamDecoderTest : public Test, public StreamDecoderCompletionCallback { HeaderMapPtr request_headers_; uint64_t stream_decoder_completion_callbacks_{0}; uint64_t pool_failures_{0}; + uint64_t stream_decoder_export_latency_callbacks_{0}; Envoy::Random::RandomGeneratorImpl random_generator_; Envoy::Tracing::HttpTracerSharedPtr http_tracer_; Envoy::Http::ResponseHeaderMapPtr test_header_; @@ -64,6 +68,7 @@ TEST_F(StreamDecoderTest, HeaderOnlyTest) { decoder->decodeHeaders(std::move(test_header_), true); EXPECT_TRUE(is_complete); EXPECT_EQ(1, stream_decoder_completion_callbacks_); + EXPECT_EQ(0, stream_decoder_export_latency_callbacks_); } TEST_F(StreamDecoderTest, HeaderWithBodyTest) { @@ -112,6 +117,7 @@ TEST_F(StreamDecoderTest, LatencyIsNotMeasured) { decoder->decodeHeaders(std::move(test_header_), true); EXPECT_EQ(0, connect_statistic_.count()); EXPECT_EQ(0, latency_statistic_.count()); + EXPECT_EQ(0, stream_decoder_export_latency_callbacks_); } TEST_F(StreamDecoderTest, LatencyIsMeasured) { @@ -146,9 +152,11 @@ TEST_F(StreamDecoderTest, LatencyIsMeasured) { decoder->onPoolReady(stream_encoder, ptr, stream_info); EXPECT_EQ(1, connect_statistic_.count()); decoder->decodeHeaders(std::move(test_header_), false); + EXPECT_EQ(0, stream_decoder_export_latency_callbacks_); decoder->decodeTrailers(std::move(test_trailer_)); EXPECT_EQ(1, connect_statistic_.count()); EXPECT_EQ(1, latency_statistic_.count()); + EXPECT_EQ(1, stream_decoder_export_latency_callbacks_); } TEST_F(StreamDecoderTest, StreamResetTest) { @@ -161,6 +169,7 @@ TEST_F(StreamDecoderTest, StreamResetTest) { decoder->onResetStream(Envoy::Http::StreamResetReason::LocalReset, "fooreason"); EXPECT_TRUE(is_complete); // these do get reported. EXPECT_EQ(1, stream_decoder_completion_callbacks_); + EXPECT_EQ(0, stream_decoder_export_latency_callbacks_); } TEST_F(StreamDecoderTest, PoolFailureTest) {