-
Notifications
You must be signed in to change notification settings - Fork 5.5k
stats: convert tag extractor regexs to Re2 #14519
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
1215e11
f242af4
b6865ad
230baaf
baee204
12d7cc4
c5561f6
bdf4545
b224e44
f2fb755
78aefc7
75b8697
4c5e2d6
6f66047
b04be99
95aa344
a833dcc
cc4d575
9288659
1ecbac0
f7c9045
d4d9553
7e4d7f2
618a203
0ad2460
910cb04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,23 @@ | ||
| #include "common/config/well_known_names.h" | ||
|
|
||
| #include "absl/strings/str_replace.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Config { | ||
|
|
||
| namespace { | ||
|
|
||
| std::string expandRegex(const std::string& regex) { | ||
| return absl::StrReplaceAll( | ||
| regex, {{"<ADDRESS>", | ||
| R"((?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}_\d+|\[[_aAbBcCdDeEfF[:digit:]]+\]_\d+))"}, | ||
|
htuch marked this conversation as resolved.
Outdated
|
||
| {"<CIPHER>", R"([0-9A-Za-z_-]+)"}, | ||
|
htuch marked this conversation as resolved.
Outdated
|
||
| {"<NAME>", R"([^\.]+)"}, | ||
| {"<ROUTE_CONFIG_NAME>", R"([\w-\.]+)"}}); | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| TagNameValues::TagNameValues() { | ||
| // Note: the default regexes are defined below in the order that they will typically be matched | ||
| // (see the TagExtractor class definition for an explanation of the iterative matching process). | ||
|
|
@@ -24,107 +39,102 @@ TagNameValues::TagNameValues() { | |
| // - Typical * notation will be used to denote an arbitrary set of characters. | ||
|
|
||
| // *_rq(_<response_code>) | ||
| addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$", "_rq_"); | ||
| addRe2(RESPONSE_CODE, R"(_rq(_(\d{3}))$)", "_rq_"); | ||
|
|
||
| // *_rq_(<response_code_class>)xx | ||
| addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$", "_rq_"); | ||
| addRe2(RESPONSE_CODE_CLASS, R"(_rq_((\d))xx$)", "_rq_"); | ||
|
|
||
| // http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.[<operation_name>.](__partition_id=<last_seven_characters_from_partition_id>) | ||
| addRegex(DYNAMO_PARTITION_ID, | ||
| "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\." | ||
| "capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))$", | ||
| ".dynamodb.table."); | ||
| addRe2(DYNAMO_PARTITION_ID, | ||
| R"(^http\.<NAME>\.dynamodb\.table\.<NAME>\.capacity\.<NAME>(\.__partition_id=(\w{7}))$)", | ||
| ".dynamodb.table."); | ||
|
|
||
| // http.[<stat_prefix>.]dynamodb.operation.(<operation_name>.)<base_stat> or | ||
| // http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.(<operation_name>.)[<partition_id>] | ||
| addRegex(DYNAMO_OPERATION, | ||
| "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?=" | ||
| "\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)", | ||
| ".dynamodb."); | ||
| addRe2(DYNAMO_OPERATION, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think maybe the two options listed above are a holdover from when the stat extraction was serial and this regex would have to run either before or after the previous one. I wonder if this could be simplified now that every extractor runs on the original stat-name, rather than being chained. This is not related to the RE2 conversion and I'm fine to leave this as a TODO. But I'm bringing it up now because it might make it easier to inspect the RE pattern. |
||
| R"(^http\.<NAME>\.dynamodb.(?:operation|table\.<NAME>\.capacity)(\.(<NAME>))(?:\.|$))", | ||
| ".dynamodb."); | ||
|
|
||
| // mongo.[<stat_prefix>.]collection.[<collection>.]callsite.(<callsite>.)query.<base_stat> | ||
| addRegex(MONGO_CALLSITE, | ||
| R"(^mongo(?=\.).*?\.collection(?=\.).*?\.callsite\.((.*?)\.).*?query.\w+?$)", | ||
| ".collection."); | ||
| addRe2(MONGO_CALLSITE, R"(^mongo\.<NAME>\.collection\.<NAME>\.callsite\.((<NAME>)\.)query\.)", | ||
| ".collection."); | ||
|
|
||
| // http.[<stat_prefix>.]dynamodb.table.(<table_name>.) or | ||
| // http.[<stat_prefix>.]dynamodb.error.(<table_name>.)* | ||
| addRegex(DYNAMO_TABLE, R"(^http(?=\.).*?\.dynamodb.(?:table|error)\.((.*?)\.))", ".dynamodb."); | ||
| addRe2(DYNAMO_TABLE, R"(^http\.<NAME>\.dynamodb.(?:table|error)\.((<NAME>)\.))", ".dynamodb."); | ||
|
|
||
| // mongo.[<stat_prefix>.]collection.(<collection>.)query.<base_stat> | ||
| addRegex(MONGO_COLLECTION, R"(^mongo(?=\.).*?\.collection\.((.*?)\.).*?query.\w+?$)", | ||
| ".collection."); | ||
| addRe2(MONGO_COLLECTION, R"(^mongo\.<NAME>\.collection\.((<NAME>)\.).*?query\.)", ".collection."); | ||
|
|
||
| // mongo.[<stat_prefix>.]cmd.(<cmd>.)<base_stat> | ||
| addRegex(MONGO_CMD, R"(^mongo(?=\.).*?\.cmd\.((.*?)\.)\w+?$)", ".cmd."); | ||
|
htuch marked this conversation as resolved.
|
||
| addRe2(MONGO_CMD, R"(^mongo\.<NAME>\.cmd\.((<NAME>)\.))", ".cmd."); | ||
|
|
||
| // cluster.[<route_target_cluster>.]grpc.[<grpc_service>.](<grpc_method>.)<base_stat> | ||
| addRegex(GRPC_BRIDGE_METHOD, R"(^cluster(?=\.).*?\.grpc(?=\.).*\.((.*?)\.)\w+?$)", ".grpc."); | ||
| // cluster.[<route_target_cluster>.]grpc.<grpc_service>.(<grpc_method>.)* | ||
| addRe2(GRPC_BRIDGE_METHOD, R"(^cluster\.<NAME>\.grpc\.<NAME>\.((<NAME>)\.))", ".grpc."); | ||
|
|
||
| // http.[<stat_prefix>.]user_agent.(<user_agent>.)<base_stat> | ||
| addRegex(HTTP_USER_AGENT, R"(^http(?=\.).*?\.user_agent\.((.*?)\.)\w+?$)", ".user_agent."); | ||
| // http.[<stat_prefix>.]user_agent.(<user_agent>.)* | ||
| addRe2(HTTP_USER_AGENT, R"(^http\.<NAME>\.user_agent\.((<NAME>)\.))", ".user_agent."); | ||
|
|
||
| // vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)<base_stat> | ||
| addRegex(VIRTUAL_CLUSTER, R"(^vhost(?=\.).*?\.vcluster\.((.*?)\.)\w+?$)", ".vcluster."); | ||
|
htuch marked this conversation as resolved.
|
||
| // vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)* | ||
| addRe2(VIRTUAL_CLUSTER, R"(^vhost\.<NAME>\.vcluster\.((<NAME>)\.))", ".vcluster."); | ||
|
|
||
| // http.[<stat_prefix>.]fault.(<downstream_cluster>.)<base_stat> | ||
| addRegex(FAULT_DOWNSTREAM_CLUSTER, R"(^http(?=\.).*?\.fault\.((.*?)\.)\w+?$)", ".fault."); | ||
| // http.[<stat_prefix>.]fault.(<downstream_cluster>.)* | ||
| addRe2(FAULT_DOWNSTREAM_CLUSTER, R"(^http\.<NAME>\.fault\.((<NAME>)\.))", ".fault."); | ||
|
|
||
| // listener.[<address>.]ssl.cipher.(<cipher>) | ||
| addRegex(SSL_CIPHER, R"(^listener(?=\.).*?\.ssl\.cipher(\.(.*?))$)"); | ||
| addRe2(SSL_CIPHER, R"(^listener\..*?\.ssl\.cipher(\.(<CIPHER>))$)"); | ||
|
|
||
| // cluster.[<cluster_name>.]ssl.ciphers.(<cipher>) | ||
| addRegex(SSL_CIPHER_SUITE, R"(^cluster(?=\.).*?\.ssl\.ciphers(\.(.*?))$)", ".ssl.ciphers."); | ||
| addRe2(SSL_CIPHER_SUITE, R"(^cluster\.<NAME>\.ssl\.ciphers(\.(<CIPHER>))$)", ".ssl.ciphers."); | ||
|
|
||
| // cluster.[<route_target_cluster>.]grpc.(<grpc_service>.)* | ||
| addRegex(GRPC_BRIDGE_SERVICE, R"(^cluster(?=\.).*?\.grpc\.((.*?)\.))", ".grpc."); | ||
| addRe2(GRPC_BRIDGE_SERVICE, R"(^cluster\.<NAME>\.grpc\.((<NAME>)\.))", ".grpc."); | ||
|
|
||
| // tcp.(<stat_prefix>.)<base_stat> | ||
| addRegex(TCP_PREFIX, R"(^tcp\.((.*?)\.)\w+?$)"); | ||
| addRe2(TCP_PREFIX, R"(^tcp\.((<NAME>)\.))"); | ||
|
|
||
| // udp.(<stat_prefix>.)<base_stat> | ||
| addRegex(UDP_PREFIX, R"(^udp\.((.*?)\.)\w+?$)"); | ||
| addRe2(UDP_PREFIX, R"(^udp\.((<NAME>)\.))"); | ||
|
|
||
| // auth.clientssl.(<stat_prefix>.)<base_stat> | ||
| addRegex(CLIENTSSL_PREFIX, R"(^auth\.clientssl\.((.*?)\.)\w+?$)"); | ||
| // auth.clientssl.(<stat_prefix>.)* | ||
| addRe2(CLIENTSSL_PREFIX, R"(^auth\.clientssl\.((<NAME>)\.))"); | ||
|
|
||
| // ratelimit.(<stat_prefix>.)<base_stat> | ||
| addRegex(RATELIMIT_PREFIX, R"(^ratelimit\.((.*?)\.)\w+?$)"); | ||
| // ratelimit.(<stat_prefix>.)* | ||
| addRe2(RATELIMIT_PREFIX, R"(^ratelimit\.((<NAME>)\.))"); | ||
|
|
||
| // cluster.(<cluster_name>.)* | ||
| addRe2(CLUSTER_NAME, "^cluster\\.(([^\\.]+)\\.).*"); | ||
| addRe2(CLUSTER_NAME, R"(^cluster\.((<NAME>)\.))"); | ||
|
|
||
| // listener.[<address>.]http.(<stat_prefix>.)* | ||
| addRegex(HTTP_CONN_MANAGER_PREFIX, R"(^listener(?=\.).*?\.http\.((.*?)\.))", ".http."); | ||
| addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^listener\..*?\.http\.((<NAME>)\.))", ".http."); | ||
|
jmarantz marked this conversation as resolved.
|
||
|
|
||
| // http.(<stat_prefix>.)* | ||
| addRegex(HTTP_CONN_MANAGER_PREFIX, "^http\\.((.*?)\\.)"); | ||
| addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^http\.((<NAME>)\.))"); | ||
|
|
||
| // listener.(<address>.)* | ||
| addRegex(LISTENER_ADDRESS, | ||
| R"(^listener\.(((?:[_.[:digit:]]*|[_\[\]aAbBcCdDeEfF[:digit:]]*))\.))"); | ||
| addRe2(LISTENER_ADDRESS, R"(^listener\.((<ADDRESS>)\.))"); | ||
|
|
||
| // vhost.(<virtual host name>.)* | ||
| addRegex(VIRTUAL_HOST, "^vhost\\.((.*?)\\.)"); | ||
| addRe2(VIRTUAL_HOST, R"(^vhost\.((<NAME>)\.))"); | ||
|
|
||
| // mongo.(<stat_prefix>.)* | ||
| addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)"); | ||
| addRe2(MONGO_PREFIX, R"(^mongo\.((<NAME>)\.))"); | ||
|
|
||
| // http.[<stat_prefix>.]rds.(<route_config_name>.)<base_stat> | ||
| addRegex(RDS_ROUTE_CONFIG, R"(^http(?=\.).*?\.rds\.((.*?)\.)\w+?$)", ".rds."); | ||
| addRe2(RDS_ROUTE_CONFIG, R"(^http\.<NAME>\.rds\.((<ROUTE_CONFIG_NAME>)\.)\w+?$)", ".rds."); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Out of curiosity, why not drop the base_stat at the end in this case?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
|
||
| // listener_manager.(worker_<id>.)* | ||
| addRegex(WORKER_ID, R"(^listener_manager\.((worker_\d+)\.))", "listener_manager.worker_"); | ||
| addRe2(WORKER_ID, R"(^listener_manager\.((worker_\d+)\.))", "listener_manager.worker_"); | ||
| } | ||
|
|
||
| void TagNameValues::addRegex(const std::string& name, const std::string& regex, | ||
| const std::string& substr) { | ||
| descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::StdRegex}); | ||
| descriptor_vec_.emplace_back(Descriptor{name, expandRegex(regex), substr, Regex::Type::StdRegex}); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: I think TagNameValues::addRegex is not called now. Delete? |
||
| } | ||
|
|
||
| void TagNameValues::addRe2(const std::string& name, const std::string& regex, | ||
| const std::string& substr) { | ||
| descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::Re2}); | ||
| descriptor_vec_.emplace_back(Descriptor{name, expandRegex(regex), substr, Regex::Type::Re2}); | ||
| } | ||
|
|
||
| } // namespace Config | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| // Note: this should be run with --compilation_mode=opt | ||
| // Running ./bazel-out/k8-opt/bin/test/common/stats/tag_extractor_impl_benchmark | ||
| // Run on (24 X 4300 MHz CPU s) | ||
| // CPU Caches: | ||
| // L1 Data 32 KiB (x12) | ||
| // L1 Instruction 32 KiB (x12) | ||
| // L2 Unified 1024 KiB (x12) | ||
| // L3 Unified 16896 KiB (x1) | ||
| // Load Average: 0.94, 0.75, 0.88 | ||
| // ***WARNING*** CPU scaling is enabled, the benchmark real time | ||
| // measurements may be noisy and will incur extra overhead. | ||
| // ------------------------------------------------------------ | ||
| // Benchmark Time CPU Iterations | ||
| // ------------------------------------------------------------ | ||
| // BM_ExtractTags/0 1759 ns 1757 ns 397721 | ||
| // BM_ExtractTags/1 498 ns 497 ns 1386765 | ||
| // BM_ExtractTags/2 814 ns 813 ns 789388 | ||
| // BM_ExtractTags/3 621 ns 620 ns 1109055 | ||
| // BM_ExtractTags/4 1320 ns 1318 ns 536701 | ||
| // BM_ExtractTags/5 882 ns 880 ns 817115 | ||
| // BM_ExtractTags/6 327 ns 327 ns 2171259 | ||
| // BM_ExtractTags/7 572 ns 571 ns 1205250 | ||
| // BM_ExtractTags/8 1238 ns 1236 ns 558481 | ||
| // BM_ExtractTags/9 1669 ns 1667 ns 414483 | ||
| // BM_ExtractTags/10 310 ns 310 ns 2237065 | ||
| // BM_ExtractTags/11 476 ns 476 ns 1465925 | ||
| // BM_ExtractTags/12 1102 ns 1100 ns 631707 | ||
| // BM_ExtractTags/13 1307 ns 1305 ns 513760 | ||
| // BM_ExtractTags/14 1583 ns 1581 ns 447159 | ||
| // BM_ExtractTags/15 957 ns 956 ns 729726 | ||
| // BM_ExtractTags/16 822 ns 821 ns 869110 | ||
| // BM_ExtractTags/17 821 ns 820 ns 839293 | ||
| // BM_ExtractTags/18 783 ns 782 ns 898442 | ||
| // BM_ExtractTags/19 330 ns 329 ns 2098821 | ||
| // BM_ExtractTags/20 342 ns 342 ns 2044062 | ||
| // BM_ExtractTags/21 389 ns 389 ns 1785110 | ||
| // BM_ExtractTags/22 847 ns 846 ns 831652 | ||
| // BM_ExtractTags/23 2022 ns 2019 ns 353368 | ||
| // BM_ExtractTags/24 306 ns 305 ns 2226702 | ||
| // BM_ExtractTags/25 277 ns 277 ns 2516796 | ||
| // BM_ExtractTags/26 494 ns 494 ns 1363306 | ||
|
|
||
| #include "envoy/config/metrics/v3/stats.pb.h" | ||
|
|
||
| #include "common/common/assert.h" | ||
| #include "common/config/well_known_names.h" | ||
| #include "common/stats/tag_producer_impl.h" | ||
|
|
||
| #include "benchmark/benchmark.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Stats { | ||
| namespace { | ||
|
|
||
| using Params = std::tuple<std::string, uint32_t>; | ||
|
|
||
| const std::vector<Params> params = { | ||
| {"listener.127.0.0.1_3012.http.http_prefix.downstream_rq_5xx", 3}, | ||
| {"cluster.ratelimit.upstream_rq_timeout", 1}, | ||
| {"listener.[__1]_0.ssl.cipher.AES256-SHA", 2}, | ||
| {"cluster.ratelimit.ssl.ciphers.ECDHE-RSA-AES128-GCM-SHA256", 2}, | ||
| {"listener.[2001_0db8_85a3_0000_0000_8a2e_0370_7334]_3543.ssl.cipher.AES256-SHA", 2}, | ||
| {"listener.127.0.0.1_0.ssl.cipher.AES256-SHA", 2}, | ||
| {"mongo.mongo_filter.op_reply", 1}, | ||
| {"mongo.mongo_filter.cmd.foo_cmd.reply_size", 2}, | ||
| {"mongo.mongo_filter.collection.bar_collection.query.multi_get", 2}, | ||
| {"mongo.mongo_filter.collection.bar_collection.callsite.baz_callsite.query.scatter_get", 3}, | ||
| {"ratelimit.foo_ratelimiter.over_limit", 1}, | ||
| {"http.egress_dynamodb_iad.downstream_cx_total", 1}, | ||
| {"http.egress_dynamodb_iad.dynamodb.operation.Query.upstream_rq_time", 2}, | ||
| {"http.egress_dynamodb_iad.dynamodb.table.bar_table.upstream_rq_time", 2}, | ||
| {"http.egress_dynamodb_iad.dynamodb.table.bar_table.capacity.Query.__partition_id=ABC1234", 4}, | ||
| {"cluster.grpc_cluster.grpc.grpc_service_1.grpc_method_1.success", 3}, | ||
| {"vhost.vhost_1.vcluster.vcluster_1.upstream_rq_2xx", 3}, | ||
| {"vhost.vhost_1.vcluster.vcluster_1.upstream_rq_200", 3}, | ||
| {"http.egress_dynamodb_iad.user_agent.ios.downstream_cx_total", 2}, | ||
| {"auth.clientssl.clientssl_prefix.auth_ip_allowlist", 1}, | ||
| {"tcp.tcp_prefix.downstream_flow_control_resumed_reading_total", 1}, | ||
| {"udp.udp_prefix-with-dashes.downstream_flow_control_resumed_reading_total", 1}, | ||
| {"http.fault_connection_manager.fault.fault_cluster.aborts_injected", 2}, | ||
| {"http.rds_connection_manager.rds.route_config.123.update_success", 2}, | ||
| {"listener_manager.worker_123.dispatcher.loop_duration_us", 1}, | ||
| {"mongo_mongo_mongo_mongo.this_is_rather_long_string_which " | ||
| "does_not_match_and_consumes_a_lot_in_case_of_backtracking_imposed_by_greedy_pattern", | ||
| 0}, | ||
| {"another_long_but_matching_string_which_may_consume_resources_if_missing_end_of_line_lock_rq_" | ||
| "2xx", | ||
| 1}, | ||
| }; | ||
|
|
||
| // NOLINTNEXTLINE(readability-identifier-naming) | ||
| void BM_ExtractTags(benchmark::State& state) { | ||
| TagProducerImpl tag_extractors{envoy::config::metrics::v3::StatsConfig()}; | ||
| const auto idx = state.range(0); | ||
| const auto& p = params[idx]; | ||
| absl::string_view str = std::get<0>(p); | ||
| const uint32_t tags_size = std::get<1>(p); | ||
|
|
||
| for (auto _ : state) { | ||
|
jmarantz marked this conversation as resolved.
|
||
| UNREFERENCED_PARAMETER(_); | ||
| TagVector tags; | ||
| tag_extractors.produceTags(str, tags); | ||
| RELEASE_ASSERT(tags.size() == tags_size, ""); | ||
| } | ||
| } | ||
| BENCHMARK(BM_ExtractTags)->DenseRange(0, 26, 1); | ||
|
|
||
| } // namespace | ||
| } // namespace Stats | ||
| } // namespace Envoy | ||
Uh oh!
There was an error while loading. Please reload this page.