Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
1215e11
stats: convert tag extractor regexs to Re2
rojkov Dec 18, 2020
f242af4
extend tag values to include dash and simplify regex for stat prefixes
rojkov Dec 28, 2020
b6865ad
drop optional matches since stat name is not muated until all extract…
rojkov Dec 28, 2020
230baaf
fix clang_tidy check
rojkov Dec 28, 2020
baee204
add reference results for the benchmark
rojkov Dec 28, 2020
12d7cc4
fix check_spelling_pedantic.py
rojkov Dec 28, 2020
c5561f6
fix clang_tidy check once again
rojkov Dec 28, 2020
bdf4545
update regex for tag values
rojkov Dec 29, 2020
b224e44
make regexes expandable with configurable patterns for sections
rojkov Dec 29, 2020
f2fb755
Merge remote-tracking branch 'upstream/master' into re2
rojkov Dec 29, 2020
78aefc7
simplify regex expansion
rojkov Dec 30, 2020
75b8697
Merge remote-tracking branch 'upstream/master' into re2
rojkov Dec 30, 2020
4c5e2d6
revert tag_extractor test to old tag values
rojkov Dec 31, 2020
6f66047
removed unused TagNameValues::addRegex()
rojkov Dec 31, 2020
b04be99
restore square brackets in comment
rojkov Dec 31, 2020
95aa344
Merge remote-tracking branch 'upstream/master' into re2
rojkov Dec 31, 2020
a833dcc
add comments to regexes
rojkov Jan 5, 2021
cc4d575
Merge remote-tracking branch 'upstream/master' into re2
rojkov Jan 5, 2021
9288659
add a comment explaining HTTP_CONN_MANAGER_PREFIX regex
rojkov Jan 5, 2021
1ecbac0
log tag extractor usage
rojkov Jan 8, 2021
f7c9045
put perf debug output under ifdef
rojkov Jan 8, 2021
d4d9553
make output more greppable with TagStats
rojkov Jan 11, 2021
7e4d7f2
Make expandRegex()'s comment more descriptive
rojkov Jan 11, 2021
618a203
amend comments and unify usage of symbol classes in regexes
rojkov Jan 12, 2021
0ad2460
simplify macros
rojkov Jan 12, 2021
910cb04
put destructor under explicit ifdef
rojkov Jan 14, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 79 additions & 45 deletions source/common/config/well_known_names.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,37 @@
#include "common/config/well_known_names.h"

#include "absl/strings/str_replace.h"

namespace Envoy {
namespace Config {

namespace {

const std::string& wordRegex() { CONSTRUCT_ON_FIRST_USE(std::string, R"([^\.]+)"); }

std::string expandRegex(const std::string& regex) {
Comment thread
jmarantz marked this conversation as resolved.
return absl::StrReplaceAll(
regex, {{"<ADDRESS>",
R"((?:\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}_\d+|\[[_aAbBcCdDeEfF[:digit:]]+\]_\d+))"},
Comment thread
htuch marked this conversation as resolved.
Outdated
{"<CALLSITE>", wordRegex()},
Comment thread
jmarantz marked this conversation as resolved.
Outdated
{"<CIPHER>", R"([0-9A-Za-z_-]+)"},
Comment thread
htuch marked this conversation as resolved.
Outdated
{"<CLUSTER_NAME>", wordRegex()},
{"<CMD>", wordRegex()},
{"<COLLECTION>", wordRegex()},
{"<DOWNSTREAM_CLUSTER>", wordRegex()},
{"<GRPC_METHOD>", wordRegex()},
{"<GRPC_SERVICE>", wordRegex()},
{"<OPERATION_NAME>", wordRegex()},
{"<ROUTE_CONFIG_NAME>", R"([\w-\.]+)"},
{"<STAT_PREFIX>", wordRegex()},
{"<TABLE_NAME>", wordRegex()},
{"<USER_AGENT>", wordRegex()},
{"<VIRTUAL_CLUSTER_NAME>", wordRegex()},
{"<VIRTUAL_HOST_NAME>", wordRegex()}});
}

} // namespace

TagNameValues::TagNameValues() {
// Note: the default regexes are defined below in the order that they will typically be matched
// (see the TagExtractor class definition for an explanation of the iterative matching process).
Expand All @@ -24,107 +53,112 @@ TagNameValues::TagNameValues() {
// - Typical * notation will be used to denote an arbitrary set of characters.

// *_rq(_<response_code>)
addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$", "_rq_");
addRe2(RESPONSE_CODE, R"(_rq(_(\d{3}))$)", "_rq_");

// *_rq_(<response_code_class>)xx
addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$", "_rq_");
addRe2(RESPONSE_CODE_CLASS, R"(_rq_((\d))xx$)", "_rq_");

// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.[<operation_name>.](__partition_id=<last_seven_characters_from_partition_id>)
addRegex(DYNAMO_PARTITION_ID,
"^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\."
"capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))$",
".dynamodb.table.");
addRe2(
DYNAMO_PARTITION_ID,
R"(^http\.<STAT_PREFIX>\.dynamodb\.table\.<TABLE_NAME>\.capacity\.<OPERATION_NAME>(\.__partition_id=(\w{7}))$)",
".dynamodb.table.");

// http.[<stat_prefix>.]dynamodb.operation.(<operation_name>.)<base_stat> or
// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.(<operation_name>.)[<partition_id>]
addRegex(DYNAMO_OPERATION,
"^http(?=\\.).*?\\.dynamodb.(?:operation|table(?="
"\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)",
".dynamodb.");
addRe2(
DYNAMO_OPERATION,
R"(^http\.<STAT_PREFIX>\.dynamodb.(?:operation|table\.<TABLE_NAME>\.capacity)(\.(<OPERATION_NAME>))(?:\.|$))",
".dynamodb.");

// mongo.[<stat_prefix>.]collection.[<collection>.]callsite.(<callsite>.)query.<base_stat>
addRegex(MONGO_CALLSITE,
R"(^mongo(?=\.).*?\.collection(?=\.).*?\.callsite\.((.*?)\.).*?query.\w+?$)",
".collection.");
addRe2(MONGO_CALLSITE,
R"(^mongo\.<STAT_PREFIX>\.collection\.<COLLECTION>\.callsite\.((<CALLSITE>)\.)query\.)",
".collection.");

// http.[<stat_prefix>.]dynamodb.table.(<table_name>.) or
// http.[<stat_prefix>.]dynamodb.error.(<table_name>.)*
addRegex(DYNAMO_TABLE, R"(^http(?=\.).*?\.dynamodb.(?:table|error)\.((.*?)\.))", ".dynamodb.");
addRe2(DYNAMO_TABLE, R"(^http\.<STAT_PREFIX>\.dynamodb.(?:table|error)\.((<TABLE_NAME>)\.))",
".dynamodb.");

// mongo.[<stat_prefix>.]collection.(<collection>.)query.<base_stat>
addRegex(MONGO_COLLECTION, R"(^mongo(?=\.).*?\.collection\.((.*?)\.).*?query.\w+?$)",
".collection.");
addRe2(MONGO_COLLECTION, R"(^mongo\.<STAT_PREFIX>\.collection\.((<COLLECTION>)\.).*?query\.)",
".collection.");

// mongo.[<stat_prefix>.]cmd.(<cmd>.)<base_stat>
addRegex(MONGO_CMD, R"(^mongo(?=\.).*?\.cmd\.((.*?)\.)\w+?$)", ".cmd.");
Comment thread
htuch marked this conversation as resolved.
addRe2(MONGO_CMD, R"(^mongo\.<STAT_PREFIX>\.cmd\.((<CMD>)\.))", ".cmd.");

// cluster.[<route_target_cluster>.]grpc.[<grpc_service>.](<grpc_method>.)<base_stat>
addRegex(GRPC_BRIDGE_METHOD, R"(^cluster(?=\.).*?\.grpc(?=\.).*\.((.*?)\.)\w+?$)", ".grpc.");
// cluster.[<route_target_cluster>.]grpc.<grpc_service>.(<grpc_method>.)*
addRe2(GRPC_BRIDGE_METHOD,
R"(^cluster\.<CLUSTER_NAME>\.grpc\.<GRPC_SERVICE>\.((<GRPC_METHOD>)\.))", ".grpc.");

// http.[<stat_prefix>.]user_agent.(<user_agent>.)<base_stat>
addRegex(HTTP_USER_AGENT, R"(^http(?=\.).*?\.user_agent\.((.*?)\.)\w+?$)", ".user_agent.");
// http.[<stat_prefix>.]user_agent.(<user_agent>.)*
addRe2(HTTP_USER_AGENT, R"(^http\.<STAT_PREFIX>\.user_agent\.((<USER_AGENT>)\.))",
".user_agent.");

// vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)<base_stat>
addRegex(VIRTUAL_CLUSTER, R"(^vhost(?=\.).*?\.vcluster\.((.*?)\.)\w+?$)", ".vcluster.");
Comment thread
htuch marked this conversation as resolved.
// vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)*
addRe2(VIRTUAL_CLUSTER, R"(^vhost\.<VIRTUAL_HOST_NAME>\.vcluster\.((<VIRTUAL_CLUSTER_NAME>)\.))",
".vcluster.");

// http.[<stat_prefix>.]fault.(<downstream_cluster>.)<base_stat>
addRegex(FAULT_DOWNSTREAM_CLUSTER, R"(^http(?=\.).*?\.fault\.((.*?)\.)\w+?$)", ".fault.");
// http.[<stat_prefix>.]fault.(<downstream_cluster>.)*
addRe2(FAULT_DOWNSTREAM_CLUSTER, R"(^http\.<STAT_PREFIX>\.fault\.((<DOWNSTREAM_CLUSTER>)\.))",
".fault.");

// listener.[<address>.]ssl.cipher.(<cipher>)
addRegex(SSL_CIPHER, R"(^listener(?=\.).*?\.ssl\.cipher(\.(.*?))$)");
addRe2(SSL_CIPHER, R"(^listener\..*?\.ssl\.cipher(\.(<CIPHER>))$)");

// cluster.[<cluster_name>.]ssl.ciphers.(<cipher>)
addRegex(SSL_CIPHER_SUITE, R"(^cluster(?=\.).*?\.ssl\.ciphers(\.(.*?))$)", ".ssl.ciphers.");
addRe2(SSL_CIPHER_SUITE, R"(^cluster\.<CLUSTER_NAME>\.ssl\.ciphers(\.(<CIPHER>))$)",
".ssl.ciphers.");

// cluster.[<route_target_cluster>.]grpc.(<grpc_service>.)*
addRegex(GRPC_BRIDGE_SERVICE, R"(^cluster(?=\.).*?\.grpc\.((.*?)\.))", ".grpc.");
addRe2(GRPC_BRIDGE_SERVICE, R"(^cluster\.<CLUSTER_NAME>\.grpc\.((<GRPC_SERVICE>)\.))", ".grpc.");

// tcp.(<stat_prefix>.)<base_stat>
addRegex(TCP_PREFIX, R"(^tcp\.((.*?)\.)\w+?$)");
addRe2(TCP_PREFIX, R"(^tcp\.((<STAT_PREFIX>)\.))");

// udp.(<stat_prefix>.)<base_stat>
addRegex(UDP_PREFIX, R"(^udp\.((.*?)\.)\w+?$)");
addRe2(UDP_PREFIX, R"(^udp\.((<STAT_PREFIX>)\.))");

// auth.clientssl.(<stat_prefix>.)<base_stat>
addRegex(CLIENTSSL_PREFIX, R"(^auth\.clientssl\.((.*?)\.)\w+?$)");
// auth.clientssl.(<stat_prefix>.)*
addRe2(CLIENTSSL_PREFIX, R"(^auth\.clientssl\.((<STAT_PREFIX>)\.))");

// ratelimit.(<stat_prefix>.)<base_stat>
addRegex(RATELIMIT_PREFIX, R"(^ratelimit\.((.*?)\.)\w+?$)");
// ratelimit.(<stat_prefix>.)*
addRe2(RATELIMIT_PREFIX, R"(^ratelimit\.((<STAT_PREFIX>)\.))");

// cluster.(<cluster_name>.)*
addRe2(CLUSTER_NAME, "^cluster\\.(([^\\.]+)\\.).*");
addRe2(CLUSTER_NAME, R"(^cluster\.((<CLUSTER_NAME>)\.))");

// listener.[<address>.]http.(<stat_prefix>.)*
addRegex(HTTP_CONN_MANAGER_PREFIX, R"(^listener(?=\.).*?\.http\.((.*?)\.))", ".http.");
addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^listener\..*?\.http\.((<STAT_PREFIX>)\.))", ".http.");

// http.(<stat_prefix>.)*
addRegex(HTTP_CONN_MANAGER_PREFIX, "^http\\.((.*?)\\.)");
addRe2(HTTP_CONN_MANAGER_PREFIX, R"(^http\.((<STAT_PREFIX>)\.))");

// listener.(<address>.)*
addRegex(LISTENER_ADDRESS,
R"(^listener\.(((?:[_.[:digit:]]*|[_\[\]aAbBcCdDeEfF[:digit:]]*))\.))");
addRe2(LISTENER_ADDRESS, R"(^listener\.((<ADDRESS>)\.))");

// vhost.(<virtual host name>.)*
addRegex(VIRTUAL_HOST, "^vhost\\.((.*?)\\.)");
addRe2(VIRTUAL_HOST, R"(^vhost\.((<VIRTUAL_HOST_NAME>)\.))");

// mongo.(<stat_prefix>.)*
addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)");
addRe2(MONGO_PREFIX, R"(^mongo\.((<STAT_PREFIX>)\.))");

// http.[<stat_prefix>.]rds.(<route_config_name>.)<base_stat>
addRegex(RDS_ROUTE_CONFIG, R"(^http(?=\.).*?\.rds\.((.*?)\.)\w+?$)", ".rds.");
addRe2(RDS_ROUTE_CONFIG, R"(^http\.<STAT_PREFIX>\.rds\.((<ROUTE_CONFIG_NAME>)\.)\w+?$)", ".rds.");

// listener_manager.(worker_<id>.)*
addRegex(WORKER_ID, R"(^listener_manager\.((worker_\d+)\.))", "listener_manager.worker_");
addRe2(WORKER_ID, R"(^listener_manager\.((worker_\d+)\.))", "listener_manager.worker_");
}

void TagNameValues::addRegex(const std::string& name, const std::string& regex,
const std::string& substr) {
descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::StdRegex});
descriptor_vec_.emplace_back(Descriptor{name, expandRegex(regex), substr, Regex::Type::StdRegex});

@jmarantz jmarantz Dec 30, 2020

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I think TagNameValues::addRegex is not called now. Delete?

}

void TagNameValues::addRe2(const std::string& name, const std::string& regex,
const std::string& substr) {
descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::Re2});
descriptor_vec_.emplace_back(Descriptor{name, expandRegex(regex), substr, Regex::Type::Re2});
}

} // namespace Config
Expand Down
4 changes: 2 additions & 2 deletions source/common/stats/tag_extractor_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,8 @@ bool TagExtractorRe2Impl::extractTag(absl::string_view stat_name, std::vector<Ta
re2::StringPiece remove_subexpr, value_subexpr;

// The regex must match and contain one or more subexpressions (all after the first are ignored).
if (re2::RE2::FullMatch(re2::StringPiece(stat_name.data(), stat_name.size()), regex_,
&remove_subexpr, &value_subexpr) &&
if (re2::RE2::PartialMatch(re2::StringPiece(stat_name.data(), stat_name.size()), regex_,
&remove_subexpr, &value_subexpr) &&
!remove_subexpr.empty()) {

// value_subexpr is the optional second submatch. It is usually inside the first submatch
Expand Down
19 changes: 19 additions & 0 deletions test/common/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,25 @@ envoy_cc_test(
],
)

envoy_cc_benchmark_binary(
name = "tag_extractor_impl_benchmark",
srcs = [
"tag_extractor_impl_speed_test.cc",
],
external_deps = [
"benchmark",
],
deps = [
"//source/common/stats:tag_producer_lib",
"@envoy_api//envoy/config/metrics/v3:pkg_cc_proto",
],
)

envoy_benchmark_test(
name = "tag_extractor_impl_benchmark_test",
benchmark_binary = "tag_extractor_impl_benchmark",
)

envoy_cc_test(
name = "thread_local_store_test",
srcs = ["thread_local_store_test.cc"],
Expand Down
110 changes: 110 additions & 0 deletions test/common/stats/tag_extractor_impl_speed_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Note: this should be run with --compilation_mode=opt
// Running ./bazel-out/k8-opt/bin/test/common/stats/tag_extractor_impl_benchmark
// Run on (24 X 4300 MHz CPU s)
// CPU Caches:
// L1 Data 32 KiB (x12)
// L1 Instruction 32 KiB (x12)
// L2 Unified 1024 KiB (x12)
// L3 Unified 16896 KiB (x1)
// Load Average: 0.94, 0.75, 0.88
// ***WARNING*** CPU scaling is enabled, the benchmark real time
// measurements may be noisy and will incur extra overhead.
// ------------------------------------------------------------
// Benchmark Time CPU Iterations
// ------------------------------------------------------------
// BM_ExtractTags/0 1796 ns 1793 ns 401011
// BM_ExtractTags/1 513 ns 512 ns 1364401
// BM_ExtractTags/2 794 ns 793 ns 899533
// BM_ExtractTags/3 646 ns 645 ns 1091666
// BM_ExtractTags/4 1315 ns 1314 ns 523727
// BM_ExtractTags/5 854 ns 853 ns 830341
// BM_ExtractTags/6 336 ns 336 ns 2085144
// BM_ExtractTags/7 574 ns 573 ns 1205468
// BM_ExtractTags/8 1264 ns 1262 ns 542151
// BM_ExtractTags/9 1724 ns 1721 ns 408224
// BM_ExtractTags/10 316 ns 316 ns 2203384
// BM_ExtractTags/11 522 ns 521 ns 1383972
// BM_ExtractTags/12 1159 ns 1157 ns 591665
// BM_ExtractTags/13 1348 ns 1346 ns 518341
// BM_ExtractTags/14 1605 ns 1603 ns 445517
// BM_ExtractTags/15 959 ns 958 ns 720045
// BM_ExtractTags/16 820 ns 818 ns 857287
// BM_ExtractTags/17 806 ns 805 ns 833224
// BM_ExtractTags/18 843 ns 842 ns 830836
// BM_ExtractTags/19 343 ns 343 ns 2088403
// BM_ExtractTags/20 351 ns 350 ns 1988046
// BM_ExtractTags/21 404 ns 404 ns 1711767
// BM_ExtractTags/22 874 ns 873 ns 802379
// BM_ExtractTags/23 2041 ns 2038 ns 334105
// BM_ExtractTags/24 316 ns 316 ns 2216693
// BM_ExtractTags/25 298 ns 298 ns 2302454
// BM_ExtractTags/26 506 ns 506 ns 1384901

#include "envoy/config/metrics/v3/stats.pb.h"

#include "common/common/assert.h"
#include "common/config/well_known_names.h"
#include "common/stats/tag_producer_impl.h"

#include "benchmark/benchmark.h"

namespace Envoy {
namespace Stats {
namespace {

using Params = std::tuple<std::string, uint32_t>;

const std::vector<Params> params = {
{"listener.127.0.0.1_3012.http.http_prefix.downstream_rq_5xx", 3},
{"cluster.ratelimit.upstream_rq_timeout", 1},
{"listener.[__1]_0.ssl.cipher.AES256-SHA", 2},
{"cluster.ratelimit.ssl.ciphers.ECDHE-RSA-AES128-GCM-SHA256", 2},
{"listener.[2001_0db8_85a3_0000_0000_8a2e_0370_7334]_3543.ssl.cipher.AES256-SHA", 2},
{"listener.127.0.0.1_0.ssl.cipher.AES256-SHA", 2},
{"mongo.mongo_filter.op_reply", 1},
{"mongo.mongo_filter.cmd.foo_cmd.reply_size", 2},
{"mongo.mongo_filter.collection.bar_collection.query.multi_get", 2},
{"mongo.mongo_filter.collection.bar_collection.callsite.baz_callsite.query.scatter_get", 3},
{"ratelimit.foo_ratelimiter.over_limit", 1},
{"http.egress_dynamodb_iad.downstream_cx_total", 1},
{"http.egress_dynamodb_iad.dynamodb.operation.Query.upstream_rq_time", 2},
{"http.egress_dynamodb_iad.dynamodb.table.bar_table.upstream_rq_time", 2},
{"http.egress_dynamodb_iad.dynamodb.table.bar_table.capacity.Query.__partition_id=ABC1234", 4},
{"cluster.grpc_cluster.grpc.grpc_service_1.grpc_method_1.success", 3},
{"vhost.vhost_1.vcluster.vcluster_1.upstream_rq_2xx", 3},
{"vhost.vhost_1.vcluster.vcluster_1.upstream_rq_200", 3},
{"http.egress_dynamodb_iad.user_agent.ios.downstream_cx_total", 2},
{"auth.clientssl.clientssl_prefix.auth_ip_allowlist", 1},
{"tcp.tcp_prefix.downstream_flow_control_resumed_reading_total", 1},
{"udp.udp_prefix-with-dashes.downstream_flow_control_resumed_reading_total", 1},
{"http.fault_connection_manager.fault.fault_cluster.aborts_injected", 2},
{"http.rds_connection_manager.rds.route_config.123.update_success", 2},
{"listener_manager.worker_123.dispatcher.loop_duration_us", 1},
{"mongo_mongo_mongo_mongo.this_is_rather_long_string_which "
"does_not_match_and_consumes_a_lot_in_case_of_backtracking_imposed_by_greedy_pattern",
0},
{"another_long_but_matching_string_which_may_consume_resources_if_missing_end_of_line_lock_rq_"
"2xx",
1},
};

// NOLINTNEXTLINE(readability-identifier-naming)
void BM_ExtractTags(benchmark::State& state) {
TagProducerImpl tag_extractors{envoy::config::metrics::v3::StatsConfig()};
const auto idx = state.range(0);
const auto& p = params[idx];
absl::string_view str = std::get<0>(p);
const uint32_t tags_size = std::get<1>(p);

for (auto _ : state) {
Comment thread
jmarantz marked this conversation as resolved.
UNREFERENCED_PARAMETER(_);
TagVector tags;
tag_extractors.produceTags(str, tags);
RELEASE_ASSERT(tags.size() == tags_size, "");
}
}
BENCHMARK(BM_ExtractTags)->DenseRange(0, 26, 1);

} // namespace
} // namespace Stats
} // namespace Envoy
Loading