diff --git a/source/common/config/well_known_names.cc b/source/common/config/well_known_names.cc
index e8fc767c41a3b..6d9ef5308e40b 100644
--- a/source/common/config/well_known_names.cc
+++ b/source/common/config/well_known_names.cc
@@ -1,8 +1,31 @@
#include "common/config/well_known_names.h"
+#include "absl/strings/str_replace.h"
+
namespace Envoy {
namespace Config {
+namespace {
+
+// To allow for more readable regular expressions to be declared below, and to
+// reduce duplication, define a few common pattern substitutions for regex
+// segments.
+std::string expandRegex(const std::string& regex) {
+ return absl::StrReplaceAll(
+ regex, {// Regex to look for either IPv4 or IPv6 addresses plus port number after underscore.
+ {"
", R"((?:(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|\[[a-fA-F_\d]+\])_\d+))"},
+ // Cipher names can contain alphanumerics with dashes and
+ // underscores.
+ {"", R"([\w-]+)"},
+ // A generic name can contain any character except dots.
+ {"", R"([^\.]+)"},
+ // Route names may contain dots in addition to alphanumerics and
+ // dashes with underscores.
+ {"", R"([\w-\.]+)"}});
+}
+
+} // namespace
+
TagNameValues::TagNameValues() {
// Note: the default regexes are defined below in the order that they will typically be matched
// (see the TagExtractor class definition for an explanation of the iterative matching process).
@@ -24,107 +47,101 @@ TagNameValues::TagNameValues() {
// - Typical * notation will be used to denote an arbitrary set of characters.
// *_rq(_)
- addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$", "_rq_");
+ addRe2(RESPONSE_CODE, R"(_rq(_(\d{3}))$)", "_rq_");
// *_rq_()xx
- addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$", "_rq_");
+ addRe2(RESPONSE_CODE_CLASS, R"(_rq_((\d))xx$)", "_rq_");
// http.[.]dynamodb.table.[.]capacity.[.](__partition_id=)
- addRegex(DYNAMO_PARTITION_ID,
- "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\."
- "capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))$",
- ".dynamodb.table.");
+ addRe2(DYNAMO_PARTITION_ID,
+ R"(^http\.\.dynamodb\.table\.\.capacity\.(\.__partition_id=(\w{7}))$)",
+ ".dynamodb.table.");
- // http.[.]dynamodb.operation.(.) or
+ // http.[.]dynamodb.operation.(.)* or
// http.[.]dynamodb.table.[.]capacity.(.)[]
- addRegex(DYNAMO_OPERATION,
- "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?="
- "\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)",
- ".dynamodb.");
+ addRe2(DYNAMO_OPERATION,
+ R"(^http\.\.dynamodb.(?:operation|table\.\.capacity)(\.())(?:\.|$))",
+ ".dynamodb.");
- // mongo.[.]collection.[.]callsite.(.)query.
- addRegex(MONGO_CALLSITE,
- R"(^mongo(?=\.).*?\.collection(?=\.).*?\.callsite\.((.*?)\.).*?query.\w+?$)",
- ".collection.");
+ // mongo.[.]collection.[.]callsite.(.)query.*
+ addRe2(MONGO_CALLSITE, R"(^mongo\.\.collection\.\.callsite\.(()\.)query\.)",
+ ".collection.");
- // http.[.]dynamodb.table.(.) or
+ // http.[.]dynamodb.table.(.)* or
// http.[.]dynamodb.error.(.)*
- addRegex(DYNAMO_TABLE, R"(^http(?=\.).*?\.dynamodb.(?:table|error)\.((.*?)\.))", ".dynamodb.");
+ addRe2(DYNAMO_TABLE, R"(^http\.\.dynamodb.(?:table|error)\.(()\.))", ".dynamodb.");
- // mongo.[.]collection.(.)query.
- addRegex(MONGO_COLLECTION, R"(^mongo(?=\.).*?\.collection\.((.*?)\.).*?query.\w+?$)",
- ".collection.");
+ // mongo.[.]collection.(.)query.*
+ addRe2(MONGO_COLLECTION, R"(^mongo\.\.collection\.(()\.).*?query\.)", ".collection.");
- // mongo.[.]cmd.(.)
- addRegex(MONGO_CMD, R"(^mongo(?=\.).*?\.cmd\.((.*?)\.)\w+?$)", ".cmd.");
+ // mongo.[.]cmd.(.)*
+ addRe2(MONGO_CMD, R"(^mongo\.\.cmd\.(()\.))", ".cmd.");
- // cluster.[.]grpc.[.](.)
- addRegex(GRPC_BRIDGE_METHOD, R"(^cluster(?=\.).*?\.grpc(?=\.).*\.((.*?)\.)\w+?$)", ".grpc.");
+ // cluster.[.]grpc.[.](.)*
+ addRe2(GRPC_BRIDGE_METHOD, R"(^cluster\.\.grpc\.\.(()\.))", ".grpc.");
- // http.[.]user_agent.(.)
- addRegex(HTTP_USER_AGENT, R"(^http(?=\.).*?\.user_agent\.((.*?)\.)\w+?$)", ".user_agent.");
+ // http.[.]user_agent.(.)*
+ addRe2(HTTP_USER_AGENT, R"(^http\.\.user_agent\.(()\.))", ".user_agent.");
- // vhost.[.]vcluster.(.)
- addRegex(VIRTUAL_CLUSTER, R"(^vhost(?=\.).*?\.vcluster\.((.*?)\.)\w+?$)", ".vcluster.");
+ // vhost.[.]vcluster.(.)*
+ addRe2(VIRTUAL_CLUSTER, R"(^vhost\.\.vcluster\.(()\.))", ".vcluster.");
- // http.[.]fault.(.)
- addRegex(FAULT_DOWNSTREAM_CLUSTER, R"(^http(?=\.).*?\.fault\.((.*?)\.)\w+?$)", ".fault.");
+ // http.[.]fault.(.)*
+ addRe2(FAULT_DOWNSTREAM_CLUSTER, R"(^http\.\.fault\.(()\.))", ".fault.");
// listener.[.]ssl.cipher.()
- addRegex(SSL_CIPHER, R"(^listener(?=\.).*?\.ssl\.cipher(\.(.*?))$)");
+ addRe2(SSL_CIPHER, R"(^listener\..*?\.ssl\.cipher(\.())$)");
// cluster.[.]ssl.ciphers.()
- addRegex(SSL_CIPHER_SUITE, R"(^cluster(?=\.).*?\.ssl\.ciphers(\.(.*?))$)", ".ssl.ciphers.");
+ addRe2(SSL_CIPHER_SUITE, R"(^cluster\.\.ssl\.ciphers(\.())$)", ".ssl.ciphers.");
// cluster.[.]grpc.(.)*
- addRegex(GRPC_BRIDGE_SERVICE, R"(^cluster(?=\.).*?\.grpc\.((.*?)\.))", ".grpc.");
+ addRe2(GRPC_BRIDGE_SERVICE, R"(^cluster\.\.grpc\.(()\.))", ".grpc.");
- // tcp.(.)
- addRegex(TCP_PREFIX, R"(^tcp\.((.*?)\.)\w+?$)");
+ // tcp.(.)*
+ addRe2(TCP_PREFIX, R"(^tcp\.(()\.))");
- // udp.(.)
- addRegex(UDP_PREFIX, R"(^udp\.((.*?)\.)\w+?$)");
+ // udp.(.)*
+ addRe2(UDP_PREFIX, R"(^udp\.(()\.))");
- // auth.clientssl.(.)
- addRegex(CLIENTSSL_PREFIX, R"(^auth\.clientssl\.((.*?)\.)\w+?$)");
+ // auth.clientssl.(.)*
+ addRe2(CLIENTSSL_PREFIX, R"(^auth\.clientssl\.(()\.))");
- // ratelimit.(.)
- addRegex(RATELIMIT_PREFIX, R"(^ratelimit\.((.*?)\.)\w+?$)");
+ // ratelimit.(.)*
+ addRe2(RATELIMIT_PREFIX, R"(^ratelimit\.(()\.))");
// cluster.(.)*
- addRe2(CLUSTER_NAME, "^cluster\\.(([^\\.]+)\\.).*");
+ addRe2(CLUSTER_NAME, R"(^cluster\.(()\.))");
// listener.[.]http.(.)*
- addRegex(HTTP_CONN_MANAGER_PREFIX, R"(^listener(?=\.).*?\.http\.((.*?)\.))", ".http.");
+ // The part can be anything here (.*?) for the sake of a simpler
+ // internal state of the regex which performs better.
+ addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^listener\..*?\.http\.(()\.))", ".http.");
// http.(.)*
- addRegex(HTTP_CONN_MANAGER_PREFIX, "^http\\.((.*?)\\.)");
+ addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^http\.(()\.))");
// listener.(.)*
- addRegex(LISTENER_ADDRESS,
- R"(^listener\.(((?:[_.[:digit:]]*|[_\[\]aAbBcCdDeEfF[:digit:]]*))\.))");
+ addRe2(LISTENER_ADDRESS, R"(^listener\.(()\.))");
// vhost.(.)*
- addRegex(VIRTUAL_HOST, "^vhost\\.((.*?)\\.)");
+ addRe2(VIRTUAL_HOST, R"(^vhost\.(()\.))");
// mongo.(.)*
- addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)");
+ addRe2(MONGO_PREFIX, R"(^mongo\.(()\.))");
// http.[.]rds.(.)
- addRegex(RDS_ROUTE_CONFIG, R"(^http(?=\.).*?\.rds\.((.*?)\.)\w+?$)", ".rds.");
+ // Note: can contain dots thus we have to maintain full
+ // match.
+ addRe2(RDS_ROUTE_CONFIG, R"(^http\.\.rds\.(()\.)\w+?$)", ".rds.");
// listener_manager.(worker_.)*
- addRegex(WORKER_ID, R"(^listener_manager\.((worker_\d+)\.))", "listener_manager.worker_");
-}
-
-void TagNameValues::addRegex(const std::string& name, const std::string& regex,
- const std::string& substr) {
- descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::StdRegex});
+ addRe2(WORKER_ID, R"(^listener_manager\.((worker_\d+)\.))", "listener_manager.worker_");
}
void TagNameValues::addRe2(const std::string& name, const std::string& regex,
const std::string& substr) {
- descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::Re2});
+ descriptor_vec_.emplace_back(Descriptor{name, expandRegex(regex), substr, Regex::Type::Re2});
}
} // namespace Config
diff --git a/source/common/config/well_known_names.h b/source/common/config/well_known_names.h
index 97ce58fd7265c..918360aff1f60 100644
--- a/source/common/config/well_known_names.h
+++ b/source/common/config/well_known_names.h
@@ -129,7 +129,6 @@ class TagNameValues {
const std::vector& descriptorVec() const { return descriptor_vec_; }
private:
- void addRegex(const std::string& name, const std::string& regex, const std::string& substr = "");
void addRe2(const std::string& name, const std::string& regex, const std::string& substr = "");
// Collection of tag descriptors.
diff --git a/source/common/stats/tag_extractor_impl.cc b/source/common/stats/tag_extractor_impl.cc
index 6aefbdf6cd258..5e735d4ab77fe 100644
--- a/source/common/stats/tag_extractor_impl.cc
+++ b/source/common/stats/tag_extractor_impl.cc
@@ -26,7 +26,9 @@ bool regexStartsWithDot(absl::string_view regex) {
TagExtractorImplBase::TagExtractorImplBase(absl::string_view name, absl::string_view regex,
absl::string_view substr)
- : name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr) {}
+ : name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr) {
+ PERF_TAG_INIT;
+}
std::string TagExtractorImplBase::extractRegexPrefix(absl::string_view regex) {
std::string prefix;
@@ -90,6 +92,7 @@ bool TagExtractorStdRegexImpl::extractTag(absl::string_view stat_name, std::vect
if (substrMismatch(stat_name)) {
PERF_RECORD(perf, "re-skip", name_);
+ PERF_TAG_INC(skipped_);
return false;
}
@@ -113,9 +116,11 @@ bool TagExtractorStdRegexImpl::extractTag(absl::string_view stat_name, std::vect
std::string::size_type end = remove_subexpr.second - stat_name.begin();
remove_characters.insert(start, end);
PERF_RECORD(perf, "re-match", name_);
+ PERF_TAG_INC(matched_);
return true;
}
PERF_RECORD(perf, "re-miss", name_);
+ PERF_TAG_INC(missed_);
return false;
}
@@ -129,6 +134,7 @@ bool TagExtractorRe2Impl::extractTag(absl::string_view stat_name, std::vector
#include
+#ifdef ENVOY_PERF_ANNOTATION
+#include
+#endif
+
#include "envoy/stats/tag_extractor.h"
#include "common/common/regex.h"
@@ -14,6 +18,29 @@
namespace Envoy {
namespace Stats {
+// To check if a tag extractor is actually used you can run
+// bazel test //test/... --test_output=streamed --define=perf_annotation=enabled
+#ifdef ENVOY_PERF_ANNOTATION
+
+struct Counters {
+ uint32_t skipped_{};
+ uint32_t matched_{};
+ uint32_t missed_{};
+};
+
+#define PERF_TAG_COUNTERS std::unique_ptr counters_
+
+#define PERF_TAG_INIT counters_ = std::make_unique()
+#define PERF_TAG_INC(member) ++(counters_->member)
+
+#else
+
+#define PERF_TAG_COUNTERS
+#define PERF_TAG_INIT
+#define PERF_TAG_INC(member)
+
+#endif
+
class TagExtractorImplBase : public TagExtractor {
public:
/**
@@ -32,6 +59,13 @@ class TagExtractorImplBase : public TagExtractor {
TagExtractorImplBase(absl::string_view name, absl::string_view regex,
absl::string_view substr = "");
+#ifdef ENVOY_PERF_ANNOTATION
+ ~TagExtractorImplBase() override {
+ std::cout << fmt::format("TagStats for {} tag extractor: skipped {}, matched {}, missing {}",
+ name_, counters_->skipped_, counters_->matched_, counters_->missed_)
+ << std::endl;
+ }
+#endif
std::string name() const override { return name_; }
absl::string_view prefixToken() const override { return prefix_; }
@@ -62,6 +96,8 @@ class TagExtractorImplBase : public TagExtractor {
const std::string name_;
const std::string prefix_;
const std::string substr_;
+
+ PERF_TAG_COUNTERS;
};
class TagExtractorStdRegexImpl : public TagExtractorImplBase {
diff --git a/test/common/stats/BUILD b/test/common/stats/BUILD
index ab9daa0d66435..53f7d41fc4466 100644
--- a/test/common/stats/BUILD
+++ b/test/common/stats/BUILD
@@ -231,6 +231,25 @@ envoy_cc_test(
],
)
+envoy_cc_benchmark_binary(
+ name = "tag_extractor_impl_benchmark",
+ srcs = [
+ "tag_extractor_impl_speed_test.cc",
+ ],
+ external_deps = [
+ "benchmark",
+ ],
+ deps = [
+ "//source/common/stats:tag_producer_lib",
+ "@envoy_api//envoy/config/metrics/v3:pkg_cc_proto",
+ ],
+)
+
+envoy_benchmark_test(
+ name = "tag_extractor_impl_benchmark_test",
+ benchmark_binary = "tag_extractor_impl_benchmark",
+)
+
envoy_cc_test(
name = "thread_local_store_test",
srcs = ["thread_local_store_test.cc"],
diff --git a/test/common/stats/tag_extractor_impl_speed_test.cc b/test/common/stats/tag_extractor_impl_speed_test.cc
new file mode 100644
index 0000000000000..e6a8603d73b28
--- /dev/null
+++ b/test/common/stats/tag_extractor_impl_speed_test.cc
@@ -0,0 +1,110 @@
+// Note: this should be run with --compilation_mode=opt
+// Running ./bazel-out/k8-opt/bin/test/common/stats/tag_extractor_impl_benchmark
+// Run on (24 X 4300 MHz CPU s)
+// CPU Caches:
+// L1 Data 32 KiB (x12)
+// L1 Instruction 32 KiB (x12)
+// L2 Unified 1024 KiB (x12)
+// L3 Unified 16896 KiB (x1)
+// Load Average: 0.94, 0.75, 0.88
+// ***WARNING*** CPU scaling is enabled, the benchmark real time
+// measurements may be noisy and will incur extra overhead.
+// ------------------------------------------------------------
+// Benchmark Time CPU Iterations
+// ------------------------------------------------------------
+// BM_ExtractTags/0 1759 ns 1757 ns 397721
+// BM_ExtractTags/1 498 ns 497 ns 1386765
+// BM_ExtractTags/2 814 ns 813 ns 789388
+// BM_ExtractTags/3 621 ns 620 ns 1109055
+// BM_ExtractTags/4 1320 ns 1318 ns 536701
+// BM_ExtractTags/5 882 ns 880 ns 817115
+// BM_ExtractTags/6 327 ns 327 ns 2171259
+// BM_ExtractTags/7 572 ns 571 ns 1205250
+// BM_ExtractTags/8 1238 ns 1236 ns 558481
+// BM_ExtractTags/9 1669 ns 1667 ns 414483
+// BM_ExtractTags/10 310 ns 310 ns 2237065
+// BM_ExtractTags/11 476 ns 476 ns 1465925
+// BM_ExtractTags/12 1102 ns 1100 ns 631707
+// BM_ExtractTags/13 1307 ns 1305 ns 513760
+// BM_ExtractTags/14 1583 ns 1581 ns 447159
+// BM_ExtractTags/15 957 ns 956 ns 729726
+// BM_ExtractTags/16 822 ns 821 ns 869110
+// BM_ExtractTags/17 821 ns 820 ns 839293
+// BM_ExtractTags/18 783 ns 782 ns 898442
+// BM_ExtractTags/19 330 ns 329 ns 2098821
+// BM_ExtractTags/20 342 ns 342 ns 2044062
+// BM_ExtractTags/21 389 ns 389 ns 1785110
+// BM_ExtractTags/22 847 ns 846 ns 831652
+// BM_ExtractTags/23 2022 ns 2019 ns 353368
+// BM_ExtractTags/24 306 ns 305 ns 2226702
+// BM_ExtractTags/25 277 ns 277 ns 2516796
+// BM_ExtractTags/26 494 ns 494 ns 1363306
+
+#include "envoy/config/metrics/v3/stats.pb.h"
+
+#include "common/common/assert.h"
+#include "common/config/well_known_names.h"
+#include "common/stats/tag_producer_impl.h"
+
+#include "benchmark/benchmark.h"
+
+namespace Envoy {
+namespace Stats {
+namespace {
+
+using Params = std::tuple;
+
+const std::vector params = {
+ {"listener.127.0.0.1_3012.http.http_prefix.downstream_rq_5xx", 3},
+ {"cluster.ratelimit.upstream_rq_timeout", 1},
+ {"listener.[__1]_0.ssl.cipher.AES256-SHA", 2},
+ {"cluster.ratelimit.ssl.ciphers.ECDHE-RSA-AES128-GCM-SHA256", 2},
+ {"listener.[2001_0db8_85a3_0000_0000_8a2e_0370_7334]_3543.ssl.cipher.AES256-SHA", 2},
+ {"listener.127.0.0.1_0.ssl.cipher.AES256-SHA", 2},
+ {"mongo.mongo_filter.op_reply", 1},
+ {"mongo.mongo_filter.cmd.foo_cmd.reply_size", 2},
+ {"mongo.mongo_filter.collection.bar_collection.query.multi_get", 2},
+ {"mongo.mongo_filter.collection.bar_collection.callsite.baz_callsite.query.scatter_get", 3},
+ {"ratelimit.foo_ratelimiter.over_limit", 1},
+ {"http.egress_dynamodb_iad.downstream_cx_total", 1},
+ {"http.egress_dynamodb_iad.dynamodb.operation.Query.upstream_rq_time", 2},
+ {"http.egress_dynamodb_iad.dynamodb.table.bar_table.upstream_rq_time", 2},
+ {"http.egress_dynamodb_iad.dynamodb.table.bar_table.capacity.Query.__partition_id=ABC1234", 4},
+ {"cluster.grpc_cluster.grpc.grpc_service_1.grpc_method_1.success", 3},
+ {"vhost.vhost_1.vcluster.vcluster_1.upstream_rq_2xx", 3},
+ {"vhost.vhost_1.vcluster.vcluster_1.upstream_rq_200", 3},
+ {"http.egress_dynamodb_iad.user_agent.ios.downstream_cx_total", 2},
+ {"auth.clientssl.clientssl_prefix.auth_ip_allowlist", 1},
+ {"tcp.tcp_prefix.downstream_flow_control_resumed_reading_total", 1},
+ {"udp.udp_prefix-with-dashes.downstream_flow_control_resumed_reading_total", 1},
+ {"http.fault_connection_manager.fault.fault_cluster.aborts_injected", 2},
+ {"http.rds_connection_manager.rds.route_config.123.update_success", 2},
+ {"listener_manager.worker_123.dispatcher.loop_duration_us", 1},
+ {"mongo_mongo_mongo_mongo.this_is_rather_long_string_which "
+ "does_not_match_and_consumes_a_lot_in_case_of_backtracking_imposed_by_greedy_pattern",
+ 0},
+ {"another_long_but_matching_string_which_may_consume_resources_if_missing_end_of_line_lock_rq_"
+ "2xx",
+ 1},
+};
+
+// NOLINTNEXTLINE(readability-identifier-naming)
+void BM_ExtractTags(benchmark::State& state) {
+ TagProducerImpl tag_extractors{envoy::config::metrics::v3::StatsConfig()};
+ const auto idx = state.range(0);
+ const auto& p = params[idx];
+ absl::string_view str = std::get<0>(p);
+ const uint32_t tags_size = std::get<1>(p);
+
+ for (auto _ : state) {
+ UNREFERENCED_PARAMETER(_);
+ TagVector tags;
+ tag_extractors.produceTags(str, tags);
+ RELEASE_ASSERT(tags.size() == tags_size, "");
+ }
+}
+BENCHMARK(BM_ExtractTags)->DenseRange(0, 26, 1);
+
+} // namespace
+} // namespace Stats
+} // namespace Envoy