diff --git a/source/common/config/well_known_names.cc b/source/common/config/well_known_names.cc index fc6ee0f41edbb..40d3ed42e4c31 100644 --- a/source/common/config/well_known_names.cc +++ b/source/common/config/well_known_names.cc @@ -24,55 +24,60 @@ TagNameValues::TagNameValues() { // - Typical * notation will be used to denote an arbitrary set of characters. // *_rq(_) - addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$"); + addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$", "_rq_"); // *_rq_()xx - addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$"); + addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$", "_rq_"); // http.[.]dynamodb.table.[.]capacity.[.](__partition_id=) - addRegex(DYNAMO_PARTITION_ID, "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\." - "capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))" - "$"); + addRegex(DYNAMO_PARTITION_ID, + "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\." + "capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))$", + ".dynamodb.table."); // http.[.]dynamodb.operation.(.) or // http.[.]dynamodb.table.[.]capacity.(.)[] - addRegex(DYNAMO_OPERATION, "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?=" - "\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)"); + addRegex(DYNAMO_OPERATION, + "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?=" + "\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)", + ".dynamodb."); // mongo.[.]collection.[.]callsite.(.)query. addRegex(MONGO_CALLSITE, - "^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$"); + "^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$", + ".collection."); // http.[.]dynamodb.table.(.) or // http.[.]dynamodb.error.(.)* - addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)"); + addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)", ".dynamodb."); // mongo.[.]collection.(.)query. - addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$"); + addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$", + ".collection."); // mongo.[.]cmd.(.) - addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$"); + addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$", ".cmd."); // cluster.[.]grpc.[.](.) - addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$"); + addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$", ".grpc."); // http.[.]user_agent.(.) - addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$"); + addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$", ".user_agent."); // vhost.[.]vcluster.(.) - addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$"); + addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$", ".vcluster."); // http.[.]fault.(.) - addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$"); + addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$", ".fault."); // listener.[
.]ssl.cipher.() addRegex(SSL_CIPHER, "^listener(?=\\.).*?\\.ssl\\.cipher(\\.(.*?))$"); // cluster.[.]ssl.ciphers.() - addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$"); + addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$", ".ssl.ciphers."); // cluster.[.]grpc.(.)* - addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)"); + addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)", ".grpc."); // tcp.(.) addRegex(TCP_PREFIX, "^tcp\\.((.*?)\\.)\\w+?$"); @@ -86,8 +91,11 @@ TagNameValues::TagNameValues() { // cluster.(.)* addRegex(CLUSTER_NAME, "^cluster\\.((.*?)\\.)"); - // http.(.)* or listener.[
.]http.(.)* - addRegex(HTTP_CONN_MANAGER_PREFIX, "^(?:|listener(?=\\.).*?\\.)http\\.((.*?)\\.)"); + // listener.[
.]http.(.)* + addRegex(HTTP_CONN_MANAGER_PREFIX, "^listener(?=\\.).*?\\.http\\.((.*?)\\.)", ".http."); + + // http.(.)* + addRegex(HTTP_CONN_MANAGER_PREFIX, "^http\\.((.*?)\\.)"); // listener.(
.)* addRegex(LISTENER_ADDRESS, @@ -100,8 +108,9 @@ TagNameValues::TagNameValues() { addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)"); } -void TagNameValues::addRegex(const std::string& name, const std::string& regex) { - descriptor_vec_.emplace_back(Descriptor(name, regex)); +void TagNameValues::addRegex(const std::string& name, const std::string& regex, + const std::string& substr) { + descriptor_vec_.emplace_back(Descriptor(name, regex, substr)); } } // namespace Config diff --git a/source/common/config/well_known_names.h b/source/common/config/well_known_names.h index 99dd2ee1c2d27..1ef78c0fb6caf 100644 --- a/source/common/config/well_known_names.h +++ b/source/common/config/well_known_names.h @@ -230,9 +230,11 @@ class TagNameValues { * tags, such as "_rq_(\\d)xx$", will probably stay as regexes. */ struct Descriptor { - Descriptor(const std::string& name, const std::string& regex) : name_(name), regex_(regex) {} + Descriptor(const std::string& name, const std::string& regex, const std::string& substr = "") + : name_(name), regex_(regex), substr_(substr) {} const std::string name_; const std::string regex_; + const std::string substr_; }; // Cluster name tag @@ -289,7 +291,7 @@ class TagNameValues { const std::vector& descriptorVec() const { return descriptor_vec_; } private: - void addRegex(const std::string& name, const std::string& regex); + void addRegex(const std::string& name, const std::string& regex, const std::string& substr = ""); // Collection of tag descriptors. std::vector descriptor_vec_; diff --git a/source/common/stats/stats_impl.cc b/source/common/stats/stats_impl.cc index 5bc04df0025f8..03b88d6b625d1 100644 --- a/source/common/stats/stats_impl.cc +++ b/source/common/stats/stats_impl.cc @@ -69,8 +69,9 @@ std::string Utility::sanitizeStatsName(const std::string& name) { return stats_name; } -TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex) - : name_(name), prefix_(std::string(extractRegexPrefix(regex))), +TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex, + const std::string& substr) + : name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr), regex_(RegexUtil::parseRegex(regex)) {} std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) { @@ -93,7 +94,8 @@ std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) { } TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name, - const std::string& regex) { + const std::string& regex, + const std::string& substr) { if (name.empty()) { throw EnvoyException("tag_name cannot be empty"); @@ -103,13 +105,22 @@ TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name, throw EnvoyException(fmt::format( "No regex specified for tag specifier and no default regex for name: '{}'", name)); } - return TagExtractorPtr{new TagExtractorImpl(name, regex)}; + return TagExtractorPtr{new TagExtractorImpl(name, regex, substr)}; +} + +bool TagExtractorImpl::substrMismatch(const std::string& stat_name) const { + return !substr_.empty() && stat_name.find(substr_) == std::string::npos; } bool TagExtractorImpl::extractTag(const std::string& stat_name, std::vector& tags, IntervalSet& remove_characters) const { - PERF_OPERATION(perf); + + if (substrMismatch(stat_name)) { + PERF_RECORD(perf, "re-skip-substr", name_); + return false; + } + std::smatch match; // The regex must match and contain one or more subexpressions (all after the first are ignored). if (std::regex_search(stat_name, match, regex_) && match.size() > 1) { @@ -181,15 +192,11 @@ int TagProducerImpl::addExtractorsMatching(absl::string_view name) { int num_found = 0; for (const auto& desc : Config::TagNames::get().descriptorVec()) { if (desc.name_ == name) { - addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_)); + addExtractor( + Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_)); ++num_found; } } - // TODO(jmarantz): Changing the default tag regexes so that more than one regex can - // yield the same tag, on the theory that this will reduce regex backtracking. At the - // moment, this doesn't happen, so this flow isn't well tested. When we start exploiting - // this, and it's tested, we can simply remove this assert. - ASSERT(num_found <= 1); return num_found; } @@ -241,7 +248,8 @@ TagProducerImpl::addDefaultExtractors(const envoy::config::metrics::v2::StatsCon if (!config.has_use_all_default_tags() || config.use_all_default_tags().value()) { for (const auto& desc : Config::TagNames::get().descriptorVec()) { names.emplace(desc.name_); - addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_)); + addExtractor( + Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_)); } } return names; diff --git a/source/common/stats/stats_impl.h b/source/common/stats/stats_impl.h index 3b825473d9d8e..85629d3449ea8 100644 --- a/source/common/stats/stats_impl.h +++ b/source/common/stats/stats_impl.h @@ -32,16 +32,28 @@ class TagExtractorImpl : public TagExtractor { * Creates a tag extractor from the regex provided. name and regex must be non-empty. * @param name name for tag extractor. * @param regex regex expression. + * @param substr a substring that -- if provided -- must be present in a stat name + * in order to match the regex. This is an optional performance tweak + * to avoid large numbers of failed regex lookups. * @return TagExtractorPtr newly constructed TagExtractor. */ - static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex); + static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex, + const std::string& substr = ""); - TagExtractorImpl(const std::string& name, const std::string& regex); + TagExtractorImpl(const std::string& name, const std::string& regex, + const std::string& substr = ""); std::string name() const override { return name_; } bool extractTag(const std::string& tag_extracted_name, std::vector& tags, IntervalSet& remove_characters) const override; absl::string_view prefixToken() const override { return prefix_; } + /** + * @param stat_name The stat name + * @return bool indicates whether tag extraction should be skipped for this stat_name due + * to a substring mismatch. + */ + bool substrMismatch(const std::string& stat_name) const; + private: /** * Examines a regex string, looking for the pattern: ^alphanumerics_with_underscores\. @@ -53,6 +65,7 @@ class TagExtractorImpl : public TagExtractor { const std::string name_; const std::string prefix_; + const std::string substr_; const std::regex regex_; }; diff --git a/test/common/stats/stats_impl_test.cc b/test/common/stats/stats_impl_test.cc index 0518fe31ae079..70032a59a1b83 100644 --- a/test/common/stats/stats_impl_test.cc +++ b/test/common/stats/stats_impl_test.cc @@ -111,6 +111,18 @@ TEST(TagExtractorTest, SingleSubexpression) { EXPECT_EQ("listner_port", tags.at(0).name_); } +TEST(TagExtractorTest, substrMismatch) { + TagExtractorImpl tag_extractor("listner_port", "^listener\\.(\\d+?\\.)\\.foo\\.", ".foo."); + EXPECT_TRUE(tag_extractor.substrMismatch("listener.80.downstream_cx_total")); + EXPECT_FALSE(tag_extractor.substrMismatch("listener.80.downstream_cx_total.foo.bar")); +} + +TEST(TagExtractorTest, noSubstrMismatch) { + TagExtractorImpl tag_extractor("listner_port", "^listener\\.(\\d+?\\.)\\.foo\\."); + EXPECT_FALSE(tag_extractor.substrMismatch("listener.80.downstream_cx_total")); + EXPECT_FALSE(tag_extractor.substrMismatch("listener.80.downstream_cx_total.foo.bar")); +} + TEST(TagExtractorTest, EmptyName) { EXPECT_THROW_WITH_MESSAGE(TagExtractorImpl::createTagExtractor("", "^listener\\.(\\d+?\\.)"), EnvoyException, "tag_name cannot be empty");