diff --git a/source/common/config/well_known_names.cc b/source/common/config/well_known_names.cc index e8fc767c41a3b..6d9ef5308e40b 100644 --- a/source/common/config/well_known_names.cc +++ b/source/common/config/well_known_names.cc @@ -1,8 +1,31 @@ #include "common/config/well_known_names.h" +#include "absl/strings/str_replace.h" + namespace Envoy { namespace Config { +namespace { + +// To allow for more readable regular expressions to be declared below, and to +// reduce duplication, define a few common pattern substitutions for regex +// segments. +std::string expandRegex(const std::string& regex) { + return absl::StrReplaceAll( + regex, {// Regex to look for either IPv4 or IPv6 addresses plus port number after underscore. + {"
", R"((?:(?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|\[[a-fA-F_\d]+\])_\d+))"}, + // Cipher names can contain alphanumerics with dashes and + // underscores. + {"", R"([\w-]+)"}, + // A generic name can contain any character except dots. + {"", R"([^\.]+)"}, + // Route names may contain dots in addition to alphanumerics and + // dashes with underscores. + {"", R"([\w-\.]+)"}}); +} + +} // namespace + TagNameValues::TagNameValues() { // Note: the default regexes are defined below in the order that they will typically be matched // (see the TagExtractor class definition for an explanation of the iterative matching process). @@ -24,107 +47,101 @@ TagNameValues::TagNameValues() { // - Typical * notation will be used to denote an arbitrary set of characters. // *_rq(_) - addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$", "_rq_"); + addRe2(RESPONSE_CODE, R"(_rq(_(\d{3}))$)", "_rq_"); // *_rq_()xx - addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$", "_rq_"); + addRe2(RESPONSE_CODE_CLASS, R"(_rq_((\d))xx$)", "_rq_"); // http.[.]dynamodb.table.[.]capacity.[.](__partition_id=) - addRegex(DYNAMO_PARTITION_ID, - "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\." - "capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))$", - ".dynamodb.table."); + addRe2(DYNAMO_PARTITION_ID, + R"(^http\.\.dynamodb\.table\.\.capacity\.(\.__partition_id=(\w{7}))$)", + ".dynamodb.table."); - // http.[.]dynamodb.operation.(.) or + // http.[.]dynamodb.operation.(.)* or // http.[.]dynamodb.table.[.]capacity.(.)[] - addRegex(DYNAMO_OPERATION, - "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?=" - "\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)", - ".dynamodb."); + addRe2(DYNAMO_OPERATION, + R"(^http\.\.dynamodb.(?:operation|table\.\.capacity)(\.())(?:\.|$))", + ".dynamodb."); - // mongo.[.]collection.[.]callsite.(.)query. - addRegex(MONGO_CALLSITE, - R"(^mongo(?=\.).*?\.collection(?=\.).*?\.callsite\.((.*?)\.).*?query.\w+?$)", - ".collection."); + // mongo.[.]collection.[.]callsite.(.)query.* + addRe2(MONGO_CALLSITE, R"(^mongo\.\.collection\.\.callsite\.(()\.)query\.)", + ".collection."); - // http.[.]dynamodb.table.(.) or + // http.[.]dynamodb.table.(.)* or // http.[.]dynamodb.error.(.)* - addRegex(DYNAMO_TABLE, R"(^http(?=\.).*?\.dynamodb.(?:table|error)\.((.*?)\.))", ".dynamodb."); + addRe2(DYNAMO_TABLE, R"(^http\.\.dynamodb.(?:table|error)\.(()\.))", ".dynamodb."); - // mongo.[.]collection.(.)query. - addRegex(MONGO_COLLECTION, R"(^mongo(?=\.).*?\.collection\.((.*?)\.).*?query.\w+?$)", - ".collection."); + // mongo.[.]collection.(.)query.* + addRe2(MONGO_COLLECTION, R"(^mongo\.\.collection\.(()\.).*?query\.)", ".collection."); - // mongo.[.]cmd.(.) - addRegex(MONGO_CMD, R"(^mongo(?=\.).*?\.cmd\.((.*?)\.)\w+?$)", ".cmd."); + // mongo.[.]cmd.(.)* + addRe2(MONGO_CMD, R"(^mongo\.\.cmd\.(()\.))", ".cmd."); - // cluster.[.]grpc.[.](.) - addRegex(GRPC_BRIDGE_METHOD, R"(^cluster(?=\.).*?\.grpc(?=\.).*\.((.*?)\.)\w+?$)", ".grpc."); + // cluster.[.]grpc.[.](.)* + addRe2(GRPC_BRIDGE_METHOD, R"(^cluster\.\.grpc\.\.(()\.))", ".grpc."); - // http.[.]user_agent.(.) - addRegex(HTTP_USER_AGENT, R"(^http(?=\.).*?\.user_agent\.((.*?)\.)\w+?$)", ".user_agent."); + // http.[.]user_agent.(.)* + addRe2(HTTP_USER_AGENT, R"(^http\.\.user_agent\.(()\.))", ".user_agent."); - // vhost.[.]vcluster.(.) - addRegex(VIRTUAL_CLUSTER, R"(^vhost(?=\.).*?\.vcluster\.((.*?)\.)\w+?$)", ".vcluster."); + // vhost.[.]vcluster.(.)* + addRe2(VIRTUAL_CLUSTER, R"(^vhost\.\.vcluster\.(()\.))", ".vcluster."); - // http.[.]fault.(.) - addRegex(FAULT_DOWNSTREAM_CLUSTER, R"(^http(?=\.).*?\.fault\.((.*?)\.)\w+?$)", ".fault."); + // http.[.]fault.(.)* + addRe2(FAULT_DOWNSTREAM_CLUSTER, R"(^http\.\.fault\.(()\.))", ".fault."); // listener.[
.]ssl.cipher.() - addRegex(SSL_CIPHER, R"(^listener(?=\.).*?\.ssl\.cipher(\.(.*?))$)"); + addRe2(SSL_CIPHER, R"(^listener\..*?\.ssl\.cipher(\.())$)"); // cluster.[.]ssl.ciphers.() - addRegex(SSL_CIPHER_SUITE, R"(^cluster(?=\.).*?\.ssl\.ciphers(\.(.*?))$)", ".ssl.ciphers."); + addRe2(SSL_CIPHER_SUITE, R"(^cluster\.\.ssl\.ciphers(\.())$)", ".ssl.ciphers."); // cluster.[.]grpc.(.)* - addRegex(GRPC_BRIDGE_SERVICE, R"(^cluster(?=\.).*?\.grpc\.((.*?)\.))", ".grpc."); + addRe2(GRPC_BRIDGE_SERVICE, R"(^cluster\.\.grpc\.(()\.))", ".grpc."); - // tcp.(.) - addRegex(TCP_PREFIX, R"(^tcp\.((.*?)\.)\w+?$)"); + // tcp.(.)* + addRe2(TCP_PREFIX, R"(^tcp\.(()\.))"); - // udp.(.) - addRegex(UDP_PREFIX, R"(^udp\.((.*?)\.)\w+?$)"); + // udp.(.)* + addRe2(UDP_PREFIX, R"(^udp\.(()\.))"); - // auth.clientssl.(.) - addRegex(CLIENTSSL_PREFIX, R"(^auth\.clientssl\.((.*?)\.)\w+?$)"); + // auth.clientssl.(.)* + addRe2(CLIENTSSL_PREFIX, R"(^auth\.clientssl\.(()\.))"); - // ratelimit.(.) - addRegex(RATELIMIT_PREFIX, R"(^ratelimit\.((.*?)\.)\w+?$)"); + // ratelimit.(.)* + addRe2(RATELIMIT_PREFIX, R"(^ratelimit\.(()\.))"); // cluster.(.)* - addRe2(CLUSTER_NAME, "^cluster\\.(([^\\.]+)\\.).*"); + addRe2(CLUSTER_NAME, R"(^cluster\.(()\.))"); // listener.[
.]http.(.)* - addRegex(HTTP_CONN_MANAGER_PREFIX, R"(^listener(?=\.).*?\.http\.((.*?)\.))", ".http."); + // The
part can be anything here (.*?) for the sake of a simpler + // internal state of the regex which performs better. + addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^listener\..*?\.http\.(()\.))", ".http."); // http.(.)* - addRegex(HTTP_CONN_MANAGER_PREFIX, "^http\\.((.*?)\\.)"); + addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^http\.(()\.))"); // listener.(
.)* - addRegex(LISTENER_ADDRESS, - R"(^listener\.(((?:[_.[:digit:]]*|[_\[\]aAbBcCdDeEfF[:digit:]]*))\.))"); + addRe2(LISTENER_ADDRESS, R"(^listener\.((
)\.))"); // vhost.(.)* - addRegex(VIRTUAL_HOST, "^vhost\\.((.*?)\\.)"); + addRe2(VIRTUAL_HOST, R"(^vhost\.(()\.))"); // mongo.(.)* - addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)"); + addRe2(MONGO_PREFIX, R"(^mongo\.(()\.))"); // http.[.]rds.(.) - addRegex(RDS_ROUTE_CONFIG, R"(^http(?=\.).*?\.rds\.((.*?)\.)\w+?$)", ".rds."); + // Note: can contain dots thus we have to maintain full + // match. + addRe2(RDS_ROUTE_CONFIG, R"(^http\.\.rds\.(()\.)\w+?$)", ".rds."); // listener_manager.(worker_.)* - addRegex(WORKER_ID, R"(^listener_manager\.((worker_\d+)\.))", "listener_manager.worker_"); -} - -void TagNameValues::addRegex(const std::string& name, const std::string& regex, - const std::string& substr) { - descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::StdRegex}); + addRe2(WORKER_ID, R"(^listener_manager\.((worker_\d+)\.))", "listener_manager.worker_"); } void TagNameValues::addRe2(const std::string& name, const std::string& regex, const std::string& substr) { - descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::Re2}); + descriptor_vec_.emplace_back(Descriptor{name, expandRegex(regex), substr, Regex::Type::Re2}); } } // namespace Config diff --git a/source/common/config/well_known_names.h b/source/common/config/well_known_names.h index 97ce58fd7265c..918360aff1f60 100644 --- a/source/common/config/well_known_names.h +++ b/source/common/config/well_known_names.h @@ -129,7 +129,6 @@ class TagNameValues { const std::vector& descriptorVec() const { return descriptor_vec_; } private: - void addRegex(const std::string& name, const std::string& regex, const std::string& substr = ""); void addRe2(const std::string& name, const std::string& regex, const std::string& substr = ""); // Collection of tag descriptors. diff --git a/source/common/stats/tag_extractor_impl.cc b/source/common/stats/tag_extractor_impl.cc index 6aefbdf6cd258..5e735d4ab77fe 100644 --- a/source/common/stats/tag_extractor_impl.cc +++ b/source/common/stats/tag_extractor_impl.cc @@ -26,7 +26,9 @@ bool regexStartsWithDot(absl::string_view regex) { TagExtractorImplBase::TagExtractorImplBase(absl::string_view name, absl::string_view regex, absl::string_view substr) - : name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr) {} + : name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr) { + PERF_TAG_INIT; +} std::string TagExtractorImplBase::extractRegexPrefix(absl::string_view regex) { std::string prefix; @@ -90,6 +92,7 @@ bool TagExtractorStdRegexImpl::extractTag(absl::string_view stat_name, std::vect if (substrMismatch(stat_name)) { PERF_RECORD(perf, "re-skip", name_); + PERF_TAG_INC(skipped_); return false; } @@ -113,9 +116,11 @@ bool TagExtractorStdRegexImpl::extractTag(absl::string_view stat_name, std::vect std::string::size_type end = remove_subexpr.second - stat_name.begin(); remove_characters.insert(start, end); PERF_RECORD(perf, "re-match", name_); + PERF_TAG_INC(matched_); return true; } PERF_RECORD(perf, "re-miss", name_); + PERF_TAG_INC(missed_); return false; } @@ -129,6 +134,7 @@ bool TagExtractorRe2Impl::extractTag(absl::string_view stat_name, std::vector #include +#ifdef ENVOY_PERF_ANNOTATION +#include +#endif + #include "envoy/stats/tag_extractor.h" #include "common/common/regex.h" @@ -14,6 +18,29 @@ namespace Envoy { namespace Stats { +// To check if a tag extractor is actually used you can run +// bazel test //test/... --test_output=streamed --define=perf_annotation=enabled +#ifdef ENVOY_PERF_ANNOTATION + +struct Counters { + uint32_t skipped_{}; + uint32_t matched_{}; + uint32_t missed_{}; +}; + +#define PERF_TAG_COUNTERS std::unique_ptr counters_ + +#define PERF_TAG_INIT counters_ = std::make_unique() +#define PERF_TAG_INC(member) ++(counters_->member) + +#else + +#define PERF_TAG_COUNTERS +#define PERF_TAG_INIT +#define PERF_TAG_INC(member) + +#endif + class TagExtractorImplBase : public TagExtractor { public: /** @@ -32,6 +59,13 @@ class TagExtractorImplBase : public TagExtractor { TagExtractorImplBase(absl::string_view name, absl::string_view regex, absl::string_view substr = ""); +#ifdef ENVOY_PERF_ANNOTATION + ~TagExtractorImplBase() override { + std::cout << fmt::format("TagStats for {} tag extractor: skipped {}, matched {}, missing {}", + name_, counters_->skipped_, counters_->matched_, counters_->missed_) + << std::endl; + } +#endif std::string name() const override { return name_; } absl::string_view prefixToken() const override { return prefix_; } @@ -62,6 +96,8 @@ class TagExtractorImplBase : public TagExtractor { const std::string name_; const std::string prefix_; const std::string substr_; + + PERF_TAG_COUNTERS; }; class TagExtractorStdRegexImpl : public TagExtractorImplBase { diff --git a/test/common/stats/BUILD b/test/common/stats/BUILD index ab9daa0d66435..53f7d41fc4466 100644 --- a/test/common/stats/BUILD +++ b/test/common/stats/BUILD @@ -231,6 +231,25 @@ envoy_cc_test( ], ) +envoy_cc_benchmark_binary( + name = "tag_extractor_impl_benchmark", + srcs = [ + "tag_extractor_impl_speed_test.cc", + ], + external_deps = [ + "benchmark", + ], + deps = [ + "//source/common/stats:tag_producer_lib", + "@envoy_api//envoy/config/metrics/v3:pkg_cc_proto", + ], +) + +envoy_benchmark_test( + name = "tag_extractor_impl_benchmark_test", + benchmark_binary = "tag_extractor_impl_benchmark", +) + envoy_cc_test( name = "thread_local_store_test", srcs = ["thread_local_store_test.cc"], diff --git a/test/common/stats/tag_extractor_impl_speed_test.cc b/test/common/stats/tag_extractor_impl_speed_test.cc new file mode 100644 index 0000000000000..e6a8603d73b28 --- /dev/null +++ b/test/common/stats/tag_extractor_impl_speed_test.cc @@ -0,0 +1,110 @@ +// Note: this should be run with --compilation_mode=opt +// Running ./bazel-out/k8-opt/bin/test/common/stats/tag_extractor_impl_benchmark +// Run on (24 X 4300 MHz CPU s) +// CPU Caches: +// L1 Data 32 KiB (x12) +// L1 Instruction 32 KiB (x12) +// L2 Unified 1024 KiB (x12) +// L3 Unified 16896 KiB (x1) +// Load Average: 0.94, 0.75, 0.88 +// ***WARNING*** CPU scaling is enabled, the benchmark real time +// measurements may be noisy and will incur extra overhead. +// ------------------------------------------------------------ +// Benchmark Time CPU Iterations +// ------------------------------------------------------------ +// BM_ExtractTags/0 1759 ns 1757 ns 397721 +// BM_ExtractTags/1 498 ns 497 ns 1386765 +// BM_ExtractTags/2 814 ns 813 ns 789388 +// BM_ExtractTags/3 621 ns 620 ns 1109055 +// BM_ExtractTags/4 1320 ns 1318 ns 536701 +// BM_ExtractTags/5 882 ns 880 ns 817115 +// BM_ExtractTags/6 327 ns 327 ns 2171259 +// BM_ExtractTags/7 572 ns 571 ns 1205250 +// BM_ExtractTags/8 1238 ns 1236 ns 558481 +// BM_ExtractTags/9 1669 ns 1667 ns 414483 +// BM_ExtractTags/10 310 ns 310 ns 2237065 +// BM_ExtractTags/11 476 ns 476 ns 1465925 +// BM_ExtractTags/12 1102 ns 1100 ns 631707 +// BM_ExtractTags/13 1307 ns 1305 ns 513760 +// BM_ExtractTags/14 1583 ns 1581 ns 447159 +// BM_ExtractTags/15 957 ns 956 ns 729726 +// BM_ExtractTags/16 822 ns 821 ns 869110 +// BM_ExtractTags/17 821 ns 820 ns 839293 +// BM_ExtractTags/18 783 ns 782 ns 898442 +// BM_ExtractTags/19 330 ns 329 ns 2098821 +// BM_ExtractTags/20 342 ns 342 ns 2044062 +// BM_ExtractTags/21 389 ns 389 ns 1785110 +// BM_ExtractTags/22 847 ns 846 ns 831652 +// BM_ExtractTags/23 2022 ns 2019 ns 353368 +// BM_ExtractTags/24 306 ns 305 ns 2226702 +// BM_ExtractTags/25 277 ns 277 ns 2516796 +// BM_ExtractTags/26 494 ns 494 ns 1363306 + +#include "envoy/config/metrics/v3/stats.pb.h" + +#include "common/common/assert.h" +#include "common/config/well_known_names.h" +#include "common/stats/tag_producer_impl.h" + +#include "benchmark/benchmark.h" + +namespace Envoy { +namespace Stats { +namespace { + +using Params = std::tuple; + +const std::vector params = { + {"listener.127.0.0.1_3012.http.http_prefix.downstream_rq_5xx", 3}, + {"cluster.ratelimit.upstream_rq_timeout", 1}, + {"listener.[__1]_0.ssl.cipher.AES256-SHA", 2}, + {"cluster.ratelimit.ssl.ciphers.ECDHE-RSA-AES128-GCM-SHA256", 2}, + {"listener.[2001_0db8_85a3_0000_0000_8a2e_0370_7334]_3543.ssl.cipher.AES256-SHA", 2}, + {"listener.127.0.0.1_0.ssl.cipher.AES256-SHA", 2}, + {"mongo.mongo_filter.op_reply", 1}, + {"mongo.mongo_filter.cmd.foo_cmd.reply_size", 2}, + {"mongo.mongo_filter.collection.bar_collection.query.multi_get", 2}, + {"mongo.mongo_filter.collection.bar_collection.callsite.baz_callsite.query.scatter_get", 3}, + {"ratelimit.foo_ratelimiter.over_limit", 1}, + {"http.egress_dynamodb_iad.downstream_cx_total", 1}, + {"http.egress_dynamodb_iad.dynamodb.operation.Query.upstream_rq_time", 2}, + {"http.egress_dynamodb_iad.dynamodb.table.bar_table.upstream_rq_time", 2}, + {"http.egress_dynamodb_iad.dynamodb.table.bar_table.capacity.Query.__partition_id=ABC1234", 4}, + {"cluster.grpc_cluster.grpc.grpc_service_1.grpc_method_1.success", 3}, + {"vhost.vhost_1.vcluster.vcluster_1.upstream_rq_2xx", 3}, + {"vhost.vhost_1.vcluster.vcluster_1.upstream_rq_200", 3}, + {"http.egress_dynamodb_iad.user_agent.ios.downstream_cx_total", 2}, + {"auth.clientssl.clientssl_prefix.auth_ip_allowlist", 1}, + {"tcp.tcp_prefix.downstream_flow_control_resumed_reading_total", 1}, + {"udp.udp_prefix-with-dashes.downstream_flow_control_resumed_reading_total", 1}, + {"http.fault_connection_manager.fault.fault_cluster.aborts_injected", 2}, + {"http.rds_connection_manager.rds.route_config.123.update_success", 2}, + {"listener_manager.worker_123.dispatcher.loop_duration_us", 1}, + {"mongo_mongo_mongo_mongo.this_is_rather_long_string_which " + "does_not_match_and_consumes_a_lot_in_case_of_backtracking_imposed_by_greedy_pattern", + 0}, + {"another_long_but_matching_string_which_may_consume_resources_if_missing_end_of_line_lock_rq_" + "2xx", + 1}, +}; + +// NOLINTNEXTLINE(readability-identifier-naming) +void BM_ExtractTags(benchmark::State& state) { + TagProducerImpl tag_extractors{envoy::config::metrics::v3::StatsConfig()}; + const auto idx = state.range(0); + const auto& p = params[idx]; + absl::string_view str = std::get<0>(p); + const uint32_t tags_size = std::get<1>(p); + + for (auto _ : state) { + UNREFERENCED_PARAMETER(_); + TagVector tags; + tag_extractors.produceTags(str, tags); + RELEASE_ASSERT(tags.size() == tags_size, ""); + } +} +BENCHMARK(BM_ExtractTags)->DenseRange(0, 26, 1); + +} // namespace +} // namespace Stats +} // namespace Envoy