From 5615e0105b8d024c90e64ebc67ed2c2fe3e3e5a1 Mon Sep 17 00:00:00 2001 From: Stephan Zuercher Date: Tue, 12 Mar 2019 14:50:58 -0700 Subject: [PATCH] stats: report sample count as an integer to prevent loss of precision When the number of samples in a histogram exceeds roughly 2^24, the value of the "+Inf" bucket in Prometheus stats output is rounded and rendered in scientific notation. This causes incorrect results in the Prometheus histogram_quantile function, which assumes that the rounding-error between the 1 hour and +Inf buckets represents some number of requests that took in excess of 1 hour. libcirclhist actually stores the sample count as a uint64_t, so stop implicitly converting it to double and output the count precisely (as we do with the non-infinite buckets). Also modifies the output format of the sum metric to avoid scientific notation. Risk Level: low Testing: added test case Doc Changes: n/a Release Notes: n/a Signed-off-by: Stephan Zuercher --- include/envoy/stats/histogram.h | 2 +- source/common/stats/histogram_impl.h | 4 +- source/server/http/admin.cc | 2 +- test/server/http/admin_test.cc | 59 ++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 4 deletions(-) diff --git a/include/envoy/stats/histogram.h b/include/envoy/stats/histogram.h index bebba3ff976e5..0cd2d6a474f33 100644 --- a/include/envoy/stats/histogram.h +++ b/include/envoy/stats/histogram.h @@ -56,7 +56,7 @@ class HistogramStatistics { * of the number of samples in the histogram, it is not guaranteed that this will be * 100% the number of samples observed. */ - virtual double sampleCount() const PURE; + virtual uint64_t sampleCount() const PURE; /** * Returns sum of all values during the period. diff --git a/source/common/stats/histogram_impl.h b/source/common/stats/histogram_impl.h index ef11744ac7163..b3fc06141c1ae 100644 --- a/source/common/stats/histogram_impl.h +++ b/source/common/stats/histogram_impl.h @@ -37,13 +37,13 @@ class HistogramStatisticsImpl : public HistogramStatistics, NonCopyable { const std::vector& computedQuantiles() const override { return computed_quantiles_; } const std::vector& supportedBuckets() const override; const std::vector& computedBuckets() const override { return computed_buckets_; } - double sampleCount() const override { return sample_count_; } + uint64_t sampleCount() const override { return sample_count_; } double sampleSum() const override { return sample_sum_; } private: std::vector computed_quantiles_; std::vector computed_buckets_; - double sample_count_; + uint64_t sample_count_; double sample_sum_; }; diff --git a/source/server/http/admin.cc b/source/server/http/admin.cc index 9c7da83e0e358..4744b6ff3f950 100644 --- a/source/server/http/admin.cc +++ b/source/server/http/admin.cc @@ -820,7 +820,7 @@ uint64_t PrometheusStatsFormatter::statsAsPrometheus( response.add(fmt::format("{0}_bucket{{{1}le=\"+Inf\"}} {2}\n", metric_name, hist_tags, stats.sampleCount())); - response.add(fmt::format("{0}_sum{{{1}}} {2}\n", metric_name, tags, stats.sampleSum())); + response.add(fmt::format("{0}_sum{{{1}}} {2:.32g}\n", metric_name, tags, stats.sampleSum())); response.add(fmt::format("{0}_count{{{1}}} {2}\n", metric_name, tags, stats.sampleCount())); } diff --git a/test/server/http/admin_test.cc b/test/server/http/admin_test.cc index 76723982e4565..6ddc63ffb0e1c 100644 --- a/test/server/http/admin_test.cc +++ b/test/server/http/admin_test.cc @@ -1254,6 +1254,12 @@ class HistogramWrapper { } } + void setHistogramValuesWithCounts(const std::vector>& values) { + for (std::pair cv : values) { + hist_insert_intscale(histogram_, cv.first, 0, cv.second); + } + } + private: histogram_t* histogram_; }; @@ -1399,6 +1405,59 @@ envoy_histogram1_count{} 0 EXPECT_EQ(expected_output, response.toString()); } +TEST_F(PrometheusStatsFormatterTest, HistogramWithHighCounts) { + HistogramWrapper h1_cumulative; + + // Force large counts to prove that the +Inf bucket doesn't overflow to scientific notation. + h1_cumulative.setHistogramValuesWithCounts(std::vector>({ + {1, 100000}, + {100, 1000000}, + {1000, 100000000}, + })); + + Stats::HistogramStatisticsImpl h1_cumulative_statistics(h1_cumulative.getHistogram()); + + auto histogram = std::make_shared>(); + histogram->name_ = "histogram1"; + histogram->used_ = true; + ON_CALL(*histogram, cumulativeStatistics()) + .WillByDefault(testing::ReturnRef(h1_cumulative_statistics)); + + addHistogram(histogram); + + Buffer::OwnedImpl response; + auto size = + PrometheusStatsFormatter::statsAsPrometheus(counters_, gauges_, histograms_, response, false); + EXPECT_EQ(1UL, size); + + const std::string expected_output = R"EOF(# TYPE envoy_histogram1 histogram +envoy_histogram1_bucket{le="0.5"} 0 +envoy_histogram1_bucket{le="1"} 0 +envoy_histogram1_bucket{le="5"} 100000 +envoy_histogram1_bucket{le="10"} 100000 +envoy_histogram1_bucket{le="25"} 100000 +envoy_histogram1_bucket{le="50"} 100000 +envoy_histogram1_bucket{le="100"} 100000 +envoy_histogram1_bucket{le="250"} 1100000 +envoy_histogram1_bucket{le="500"} 1100000 +envoy_histogram1_bucket{le="1000"} 1100000 +envoy_histogram1_bucket{le="2500"} 101100000 +envoy_histogram1_bucket{le="5000"} 101100000 +envoy_histogram1_bucket{le="10000"} 101100000 +envoy_histogram1_bucket{le="30000"} 101100000 +envoy_histogram1_bucket{le="60000"} 101100000 +envoy_histogram1_bucket{le="300000"} 101100000 +envoy_histogram1_bucket{le="600000"} 101100000 +envoy_histogram1_bucket{le="1800000"} 101100000 +envoy_histogram1_bucket{le="3600000"} 101100000 +envoy_histogram1_bucket{le="+Inf"} 101100000 +envoy_histogram1_sum{} 105105105000 +envoy_histogram1_count{} 101100000 +)EOF"; + + EXPECT_EQ(expected_output, response.toString()); +} + TEST_F(PrometheusStatsFormatterTest, OutputWithAllMetricTypes) { addCounter("cluster.test_1.upstream_cx_total", {{"a.tag-name", "a.tag-value"}}); addCounter("cluster.test_2.upstream_cx_total", {{"another_tag_name", "another_tag-value"}});