-
Notifications
You must be signed in to change notification settings - Fork 5.5k
stats: Guard regex lookups with substring searches where possible #2693
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
39cdb0e
216ace1
fd2b507
f83184b
380a869
f3b82c3
cabcc61
9628651
1524112
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,55 +24,60 @@ TagNameValues::TagNameValues() { | |
| // - Typical * notation will be used to denote an arbitrary set of characters. | ||
|
|
||
| // *_rq(_<response_code>) | ||
| addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$"); | ||
| addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$", "_rq_"); | ||
|
|
||
| // *_rq_(<response_code_class>)xx | ||
| addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$"); | ||
| addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$", "_rq_"); | ||
|
|
||
| // http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.[<operation_name>.](__partition_id=<last_seven_characters_from_partition_id>) | ||
| addRegex(DYNAMO_PARTITION_ID, "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\." | ||
| "capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))" | ||
| "$"); | ||
| addRegex(DYNAMO_PARTITION_ID, | ||
| "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\." | ||
| "capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))$", | ||
| ".dynamodb.table."); | ||
|
|
||
| // http.[<stat_prefix>.]dynamodb.operation.(<operation_name>.)<base_stat> or | ||
| // http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.(<operation_name>.)[<partition_id>] | ||
| addRegex(DYNAMO_OPERATION, "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?=" | ||
| "\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)"); | ||
| addRegex(DYNAMO_OPERATION, | ||
| "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?=" | ||
| "\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)", | ||
| ".dynamodb."); | ||
|
|
||
| // mongo.[<stat_prefix>.]collection.[<collection>.]callsite.(<callsite>.)query.<base_stat> | ||
| addRegex(MONGO_CALLSITE, | ||
| "^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$"); | ||
| "^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$", | ||
| ".collection."); | ||
|
|
||
| // http.[<stat_prefix>.]dynamodb.table.(<table_name>.) or | ||
| // http.[<stat_prefix>.]dynamodb.error.(<table_name>.)* | ||
| addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)"); | ||
| addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)", ".dynamodb"); | ||
|
|
||
| // mongo.[<stat_prefix>.]collection.(<collection>.)query.<base_stat> | ||
| addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$"); | ||
| addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$", | ||
| ".collection."); | ||
|
|
||
| // mongo.[<stat_prefix>.]cmd.(<cmd>.)<base_stat> | ||
| addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$"); | ||
| addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$", ".cmd."); | ||
|
|
||
| // cluster.[<route_target_cluster>.]grpc.[<grpc_service>.](<grpc_method>.)<base_stat> | ||
| addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$"); | ||
| addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$", ".grpc."); | ||
|
|
||
| // http.[<stat_prefix>.]user_agent.(<user_agent>.)<base_stat> | ||
| addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$"); | ||
| addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$", ".user_agent."); | ||
|
|
||
| // vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)<base_stat> | ||
| addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$"); | ||
| addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$", ".vcluster."); | ||
|
|
||
| // http.[<stat_prefix>.]fault.(<downstream_cluster>.)<base_stat> | ||
| addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$"); | ||
| addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$", ".fault."); | ||
|
|
||
| // listener.[<address>.]ssl.cipher.(<cipher>) | ||
| addRegex(SSL_CIPHER, "^listener(?=\\.).*?\\.ssl\\.cipher(\\.(.*?))$"); | ||
|
|
||
| // cluster.[<cluster_name>.]ssl.ciphers.(<cipher>) | ||
| addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$"); | ||
| addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$", "ssl.ciphers"); | ||
|
|
||
| // cluster.[<route_target_cluster>.]grpc.(<grpc_service>.)* | ||
| addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)"); | ||
| addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)", "grpc"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above, why not
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
|
|
||
| // tcp.(<stat_prefix>.)<base_stat> | ||
| addRegex(TCP_PREFIX, "^tcp\\.((.*?)\\.)\\w+?$"); | ||
|
|
@@ -87,7 +92,7 @@ TagNameValues::TagNameValues() { | |
| addRegex(CLUSTER_NAME, "^cluster\\.((.*?)\\.)"); | ||
|
|
||
| // http.(<stat_prefix>.)* or listener.[<address>.]http.(<stat_prefix>.)* | ||
| addRegex(HTTP_CONN_MANAGER_PREFIX, "^(?:|listener(?=\\.).*?\\.)http\\.((.*?)\\.)"); | ||
| addRegex(HTTP_CONN_MANAGER_PREFIX, "^(?:|listener(?=\\.).*?\\.)http\\.((.*?)\\.)", "http."); | ||
|
|
||
| // listener.(<address>.)* | ||
| addRegex(LISTENER_ADDRESS, | ||
|
|
@@ -100,8 +105,9 @@ TagNameValues::TagNameValues() { | |
| addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)"); | ||
| } | ||
|
|
||
| void TagNameValues::addRegex(const std::string& name, const std::string& regex) { | ||
| descriptor_vec_.emplace_back(Descriptor(name, regex)); | ||
| void TagNameValues::addRegex(const std::string& name, const std::string& regex, | ||
| const std::string& substr) { | ||
| descriptor_vec_.emplace_back(Descriptor(name, regex, substr)); | ||
| } | ||
|
|
||
| } // namespace Config | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,8 +69,9 @@ std::string Utility::sanitizeStatsName(const std::string& name) { | |
| return stats_name; | ||
| } | ||
|
|
||
| TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex) | ||
| : name_(name), prefix_(std::string(extractRegexPrefix(regex))), | ||
| TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex, | ||
| const std::string& substr) | ||
| : name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr), | ||
| regex_(RegexUtil::parseRegex(regex)) {} | ||
|
|
||
| std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) { | ||
|
|
@@ -93,7 +94,8 @@ std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) { | |
| } | ||
|
|
||
| TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name, | ||
| const std::string& regex) { | ||
| const std::string& regex, | ||
| const std::string& substr) { | ||
|
|
||
| if (name.empty()) { | ||
| throw EnvoyException("tag_name cannot be empty"); | ||
|
|
@@ -103,13 +105,18 @@ TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name, | |
| throw EnvoyException(fmt::format( | ||
| "No regex specified for tag specifier and no default regex for name: '{}'", name)); | ||
| } | ||
| return TagExtractorPtr{new TagExtractorImpl(name, regex)}; | ||
| return TagExtractorPtr{new TagExtractorImpl(name, regex, substr)}; | ||
| } | ||
|
|
||
| bool TagExtractorImpl::extractTag(const std::string& stat_name, std::vector<Tag>& tags, | ||
| IntervalSet<size_t>& remove_characters) const { | ||
|
|
||
| PERF_OPERATION(perf); | ||
|
|
||
| if (!substr_.empty() && stat_name.find(substr_) == std::string::npos) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a simple test case to isolate this new portion of the logic and ensure it works as expected. The code is already covered by the regex test cases, but adding a separate test will isolate the functionality from the tests that check the regexes, themselves. See tests in https://github.com/envoyproxy/envoy/blob/master/test/integration/stats_integration_test.cc and https://github.com/envoyproxy/envoy/blob/master/test/common/stats/stats_impl_test.cc#L88 for other tests that test specific parts of the extraction process.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. Broke out substrMismatch as a separate method to make it easier to test. |
||
| PERF_RECORD(perf, "re-skip-substr", name_); | ||
| return false; | ||
| } | ||
|
|
||
| std::smatch match; | ||
| // The regex must match and contain one or more subexpressions (all after the first are ignored). | ||
| if (std::regex_search(stat_name, match, regex_) && match.size() > 1) { | ||
|
|
@@ -181,7 +188,8 @@ int TagProducerImpl::addExtractorsMatching(absl::string_view name) { | |
| int num_found = 0; | ||
| for (const auto& desc : Config::TagNames::get().descriptorVec()) { | ||
| if (desc.name_ == name) { | ||
| addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_)); | ||
| addExtractor( | ||
| Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_)); | ||
| ++num_found; | ||
| } | ||
| } | ||
|
|
@@ -241,7 +249,8 @@ TagProducerImpl::addDefaultExtractors(const envoy::config::metrics::v2::StatsCon | |
| if (!config.has_use_all_default_tags() || config.use_all_default_tags().value()) { | ||
| for (const auto& desc : Config::TagNames::get().descriptorVec()) { | ||
| names.emplace(desc.name_); | ||
| addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_)); | ||
| addExtractor( | ||
| Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_)); | ||
| } | ||
| } | ||
| return names; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,9 +34,11 @@ class TagExtractorImpl : public TagExtractor { | |
| * @param regex regex expression. | ||
| * @return TagExtractorPtr newly constructed TagExtractor. | ||
| */ | ||
| static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex); | ||
| static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add docs here and elsewhere that explain the prefix and what it does.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once that's done, LGTM.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done; thanks for the review, and good catch! |
||
| const std::string& substr = ""); | ||
|
|
||
| TagExtractorImpl(const std::string& name, const std::string& regex); | ||
| TagExtractorImpl(const std::string& name, const std::string& regex, | ||
| const std::string& substr = ""); | ||
| std::string name() const override { return name_; } | ||
| bool extractTag(const std::string& tag_extracted_name, std::vector<Tag>& tags, | ||
| IntervalSet<size_t>& remove_characters) const override; | ||
|
|
@@ -53,6 +55,7 @@ class TagExtractorImpl : public TagExtractor { | |
|
|
||
| const std::string name_; | ||
| const std::string prefix_; | ||
| const std::string substr_; | ||
| const std::regex regex_; | ||
| }; | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not
".ssl.ciphers."like in the other examples?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That one was an omission, because I had some trouble with ".http." below and I wound up backing off the inclusion of the "." in some places where it was OK.
However I managed to sort out my issues with ".http." by exploiting the fact that I can now have two separate regexes for the same tag.