envoyproxy · ggreenway · Apr 24, 2020 · Apr 17, 2020 · Apr 17, 2020 · Apr 20, 2020
diff --git a/docs/root/version_history/current.rst b/docs/root/version_history/current.rst
@@ -20,6 +20,7 @@ Changes
 * logger: added :ref:`--log-format-prefix-with-location <operations_cli>` command line option to prefix '%v' with file path and line number.
 * network filters: added a :ref:`postgres proxy filter <config_network_filters_postgres_proxy>`.
 * network filters: added a :ref:`rocketmq proxy filter <config_network_filters_rocketmq_proxy>`.
+* prometheus stats: fix the sort order of output lines to comply with the standard.
 * request_id: added to :ref:`always_set_request_id_in_response setting <envoy_v3_api_field_extensions.filters.network.http_connection_manager.v3.HttpConnectionManager.always_set_request_id_in_response>`
   to set :ref:`x-request-id <config_http_conn_man_headers_x-request-id>` header in response even if
   tracing is not forced.

diff --git a/include/envoy/stats/refcount_ptr.h b/include/envoy/stats/refcount_ptr.h
@@ -25,6 +25,8 @@ namespace Stats {
 // API in years.
 template <class T> class RefcountPtr {
 public:
+  using element_type = T;
+
   RefcountPtr() : ptr_(nullptr) {}
 
   // Constructing a reference-counted object from a pointer; this is safe to

diff --git a/source/server/http/stats_handler.cc b/source/server/http/stats_handler.cc
@@ -190,52 +190,110 @@ uint64_t PrometheusStatsFormatter::statsAsPrometheus(
     const std::vector<Stats::GaugeSharedPtr>& gauges,
     const std::vector<Stats::ParentHistogramSharedPtr>& histograms, Buffer::Instance& response,
     const bool used_only, const absl::optional<std::regex>& regex) {
-  std::unordered_set<std::string> metric_type_tracker;
-  for (const auto& counter : counters) {
-    if (!shouldShowMetric(*counter, used_only, regex)) {
-      continue;
-    }
 
-    const std::string tags = formattedTags(counter->tags());
-    const std::string metric_name = metricName(counter->tagExtractedName());
-    if (metric_type_tracker.find(metric_name) == metric_type_tracker.end()) {
-      metric_type_tracker.insert(metric_name);
-      response.add(fmt::format("# TYPE {0} counter\n", metric_name));
+  /*
+   * From
+   * https:*github.com/prometheus/docs/blob/master/content/docs/instrumenting/exposition_formats.md#grouping-and-sorting:
+   *
+   * All lines for a given metric must be provided as one single group, with the optional HELP and
+   * TYPE lines first (in no particular order). Beyond that, reproducible sorting in repeated
+   * expositions is preferred but not required, i.e. do not sort if the computational cost is
+   * prohibitive.
+   */
+
+  /**
+   * Processes a metric type (counter, gauge, histogram) by generating all output lines, sorting
+   * them by tag-extracted metric name, and then outputting them in the correct sorted order into
+   * response.
+   *
+   * @param metrics A vector of Stats::RefcountPtr to a metric type.
+   * @param generate_output A std::function<std::string(const MetricType& metric, const std::string&
+   *        prefixedTagExtractedName)> which returns the output text for this metric.
+   */
+  auto process_type = [&response, &regex, used_only](const auto& metrics,
+                                                     const auto& generate_output,
+                                                     absl::string_view type) -> uint64_t {
+    // Get the inner element type (MetricType) from a `const
+    // std::vector<Stats::MetricTypeSharedPtr>&`
+    using MetricType =
+        typename std::remove_reference<decltype(metrics)>::type::value_type::element_type;
+
+    struct MetricLessThan {
+      bool operator()(const MetricType* a, const MetricType* b) const {
+        ASSERT(&a->constSymbolTable() == &b->constSymbolTable());
+        return a->constSymbolTable().lessThan(a->statName(), b->statName());
+      }
+    };
+
+    // This is an unsorted collection of dumb-pointers (no need to increment then decrement every
+    // refcount; ownership is held throughout by `metrics`). It is unsorted for efficiency, but will
+    // be sorted before producing the final output to satisfy the "preferred" ordering from the
+    // prometheus spec: metrics will be sorted by their tags' textual representation, which will be
+    // consistent across calls.
+    using MetricTypeUnsortedCollection = std::vector<const MetricType*>;
+
+    // Return early to avoid crashing when getting the symbol table from the first metric.
+    if (metrics.empty()) {
+      return 0;
     }
-    response.add(fmt::format("{0}{{{1}}} {2}\n", metric_name, tags, counter->value()));
-  }
 
-  for (const auto& gauge : gauges) {
-    if (!shouldShowMetric(*gauge, used_only, regex)) {
-      continue;
-    }
+    // There should only be one symbol table for all of the stats in the admin
+    // interface. If this assumption changes, the name comparisons in this function
+    // will have to change to compare to convert all StatNames to strings before
+    // comparison.
+    const Stats::SymbolTable& global_symbol_table = metrics.front()->constSymbolTable();
 
-    const std::string tags = formattedTags(gauge->tags());
-    const std::string metric_name = metricName(gauge->tagExtractedName());
-    if (metric_type_tracker.find(metric_name) == metric_type_tracker.end()) {
-      metric_type_tracker.insert(metric_name);
-      response.add(fmt::format("# TYPE {0} gauge\n", metric_name));
-    }
-    response.add(fmt::format("{0}{{{1}}} {2}\n", metric_name, tags, gauge->value()));
-  }
+    // Sorted collection of metrics sorted by their tagExtractedName, to satisfy the requirements
+    // of the exposition format.
+    std::map<Stats::StatName, MetricTypeUnsortedCollection, Stats::StatNameLessThan> groups(
+        global_symbol_table);
 
-  for (const auto& histogram : histograms) {
-    if (!shouldShowMetric(*histogram, used_only, regex)) {
-      continue;
-    }
+    for (const auto& metric : metrics) {
+      ASSERT(&global_symbol_table == &metric->constSymbolTable());
 
-    const std::string tags = formattedTags(histogram->tags());
-    const std::string hist_tags = histogram->tags().empty() ? EMPTY_STRING : (tags + ",");
+      if (!shouldShowMetric(*metric, used_only, regex)) {
+        continue;
+      }
 
-    const std::string metric_name = metricName(histogram->tagExtractedName());
-    if (metric_type_tracker.find(metric_name) == metric_type_tracker.end()) {
-      metric_type_tracker.insert(metric_name);
-      response.add(fmt::format("# TYPE {0} histogram\n", metric_name));
+      groups[metric->tagExtractedStatName()].push_back(metric.get());
     }
 
-    const Stats::HistogramStatistics& stats = histogram->cumulativeStatistics();
+    for (auto& group : groups) {
+      const std::string metric_name = metricName(global_symbol_table.toString(group.first));
+      response.add(fmt::format("# TYPE {0} {1}\n", metric_name, type));
+
+      // Sort before producing the final output to satisfy the "preferred" ordering from the
+      // prometheus spec: metrics will be sorted by their tags' textual representation, which will
+      // be consistent across calls.
+      std::sort(group.second.begin(), group.second.end(), MetricLessThan());
+
+      for (const auto& metric : group.second) {
+        response.add(generate_output(*metric, metric_name));
+      }
+      response.add("\n");
+    }
+    return groups.size();
+  };
+
+  // Returns the prometheus output line for a counter or a gauge.
+  auto generate_counter_and_gauge_output = [](const auto& metric,
+                                              const std::string& metric_name) -> std::string {
+    const std::string tags = formattedTags(metric.tags());
+    return fmt::format("{0}{{{1}}} {2}\n", metric_name, tags, metric.value());
+  };
+
+  // Returns the prometheus output for a histogram. The output is a multi-line string (with embedded
+  // newlines) that contains all the individual bucket counts and sum/count for a single histogram
+  // (metric_name plus all tags).
+  auto generate_histogram_output = [](const Stats::ParentHistogram& histogram,
+                                      const std::string& metric_name) -> std::string {
+    const std::string tags = formattedTags(histogram.tags());
+    const std::string hist_tags = histogram.tags().empty() ? EMPTY_STRING : (tags + ",");
+
+    const Stats::HistogramStatistics& stats = histogram.cumulativeStatistics();
     const std::vector<double>& supported_buckets = stats.supportedBuckets();
     const std::vector<uint64_t>& computed_buckets = stats.computedBuckets();
+    std::string output;
     for (size_t i = 0; i < supported_buckets.size(); ++i) {
       double bucket = supported_buckets[i];
       uint64_t value = computed_buckets[i];
@@ -244,18 +302,25 @@ uint64_t PrometheusStatsFormatter::statsAsPrometheus(
       // 'g' operator which prints the number in general fixed point format or scientific format
       // with precision 50 to round the number up to 32 significant digits in fixed point format
       // which should cover pretty much all cases
-      response.add(fmt::format("{0}_bucket{{{1}le=\"{2:.32g}\"}} {3}\n", metric_name, hist_tags,
-                               bucket, value));
+      output.append(fmt::format("{0}_bucket{{{1}le=\"{2:.32g}\"}} {3}\n", metric_name, hist_tags,
+                                bucket, value));
     }
 
-    response.add(fmt::format("{0}_bucket{{{1}le=\"+Inf\"}} {2}\n", metric_name, hist_tags,
-                             stats.sampleCount()));
-    response.add(fmt::format("{0}_sum{{{1}}} {2:.32g}\n", metric_name, tags, stats.sampleSum()));
-    response.add(fmt::format("{0}_count{{{1}}} {2}\n", metric_name, tags, stats.sampleCount()));
-  }
+    output.append(fmt::format("{0}_bucket{{{1}le=\"+Inf\"}} {2}\n", metric_name, hist_tags,
+                              stats.sampleCount()));
+    output.append(fmt::format("{0}_sum{{{1}}} {2:.32g}\n", metric_name, tags, stats.sampleSum()));
+    output.append(fmt::format("{0}_count{{{1}}} {2}\n", metric_name, tags, stats.sampleCount()));
 
-  return metric_type_tracker.size();
-}
+    return output;
+  };
+
+  uint64_t metric_name_count = 0;
+  metric_name_count += process_type(counters, generate_counter_and_gauge_output, "counter");
+  metric_name_count += process_type(gauges, generate_counter_and_gauge_output, "gauge");
+  metric_name_count += process_type(histograms, generate_histogram_output, "histogram");
+
+  return metric_name_count;
+} // namespace Server
 
 std::string
 StatsHandler::statsAsJson(const std::map<std::string, uint64_t>& all_stats,

diff --git a/test/mocks/stats/mocks.h b/test/mocks/stats/mocks.h
@@ -110,12 +110,25 @@ template <class BaseClass> class MockMetric : public BaseClass {
 
   void setTags(const TagVector& tags) {
     tag_pool_.clear();
+    tag_names_and_values_.clear();
     tags_ = tags;
     for (const Tag& tag : tags) {
       tag_names_and_values_.push_back(tag_pool_.add(tag.name_));
       tag_names_and_values_.push_back(tag_pool_.add(tag.value_));
     }
   }
+
+  void setTags(const Stats::StatNameTagVector& tags) {
+    tag_pool_.clear();
+    tag_names_and_values_.clear();
+    tags_.clear();
+    for (const StatNameTag& tag : tags) {
+      tag_names_and_values_.push_back(tag.first);
+      tag_names_and_values_.push_back(tag.second);
+      tags_.push_back(Tag{symbol_table_->toString(tag.first), symbol_table_->toString(tag.second)});
+    }
+  }
+
   void addTag(const Tag& tag) {
     tags_.emplace_back(tag);
     tag_names_and_values_.push_back(tag_pool_.add(tag.name_));