Skip to content
Merged
51 changes: 30 additions & 21 deletions source/common/config/well_known_names.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,55 +24,60 @@ TagNameValues::TagNameValues() {
// - Typical * notation will be used to denote an arbitrary set of characters.

// *_rq(_<response_code>)
addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$");
addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$", "_rq_");

// *_rq_(<response_code_class>)xx
addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$");
addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$", "_rq_");

// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.[<operation_name>.](__partition_id=<last_seven_characters_from_partition_id>)
addRegex(DYNAMO_PARTITION_ID, "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\."
"capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))"
"$");
addRegex(DYNAMO_PARTITION_ID,
"^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\."
"capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))$",
".dynamodb.table.");

// http.[<stat_prefix>.]dynamodb.operation.(<operation_name>.)<base_stat> or
// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.(<operation_name>.)[<partition_id>]
addRegex(DYNAMO_OPERATION, "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?="
"\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)");
addRegex(DYNAMO_OPERATION,
"^http(?=\\.).*?\\.dynamodb.(?:operation|table(?="
"\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)",
".dynamodb.");

// mongo.[<stat_prefix>.]collection.[<collection>.]callsite.(<callsite>.)query.<base_stat>
addRegex(MONGO_CALLSITE,
"^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$");
"^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$",
".collection.");

// http.[<stat_prefix>.]dynamodb.table.(<table_name>.) or
// http.[<stat_prefix>.]dynamodb.error.(<table_name>.)*
addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)");
addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)", ".dynamodb.");

// mongo.[<stat_prefix>.]collection.(<collection>.)query.<base_stat>
addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$");
addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$",
".collection.");

// mongo.[<stat_prefix>.]cmd.(<cmd>.)<base_stat>
addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$");
addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$", ".cmd.");

// cluster.[<route_target_cluster>.]grpc.[<grpc_service>.](<grpc_method>.)<base_stat>
addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$");
addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$", ".grpc.");

// http.[<stat_prefix>.]user_agent.(<user_agent>.)<base_stat>
addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$");
addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$", ".user_agent.");

// vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)<base_stat>
addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$");
addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$", ".vcluster.");

// http.[<stat_prefix>.]fault.(<downstream_cluster>.)<base_stat>
addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$");
addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$", ".fault.");

// listener.[<address>.]ssl.cipher.(<cipher>)
addRegex(SSL_CIPHER, "^listener(?=\\.).*?\\.ssl\\.cipher(\\.(.*?))$");

// cluster.[<cluster_name>.]ssl.ciphers.(<cipher>)
addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$");
addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$", ".ssl.ciphers.");

// cluster.[<route_target_cluster>.]grpc.(<grpc_service>.)*
addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)");
addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)", ".grpc.");

// tcp.(<stat_prefix>.)<base_stat>
addRegex(TCP_PREFIX, "^tcp\\.((.*?)\\.)\\w+?$");
Expand All @@ -86,8 +91,11 @@ TagNameValues::TagNameValues() {
// cluster.(<cluster_name>.)*
addRegex(CLUSTER_NAME, "^cluster\\.((.*?)\\.)");

// http.(<stat_prefix>.)* or listener.[<address>.]http.(<stat_prefix>.)*
addRegex(HTTP_CONN_MANAGER_PREFIX, "^(?:|listener(?=\\.).*?\\.)http\\.((.*?)\\.)");
// listener.[<address>.]http.(<stat_prefix>.)*
addRegex(HTTP_CONN_MANAGER_PREFIX, "^listener(?=\\.).*?\\.http\\.((.*?)\\.)", ".http.");

// http.(<stat_prefix>.)*
addRegex(HTTP_CONN_MANAGER_PREFIX, "^http\\.((.*?)\\.)");

// listener.(<address>.)*
addRegex(LISTENER_ADDRESS,
Expand All @@ -100,8 +108,9 @@ TagNameValues::TagNameValues() {
addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)");
}

void TagNameValues::addRegex(const std::string& name, const std::string& regex) {
descriptor_vec_.emplace_back(Descriptor(name, regex));
void TagNameValues::addRegex(const std::string& name, const std::string& regex,
const std::string& substr) {
descriptor_vec_.emplace_back(Descriptor(name, regex, substr));
}

} // namespace Config
Expand Down
6 changes: 4 additions & 2 deletions source/common/config/well_known_names.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,11 @@ class TagNameValues {
* tags, such as "_rq_(\\d)xx$", will probably stay as regexes.
*/
struct Descriptor {
Descriptor(const std::string& name, const std::string& regex) : name_(name), regex_(regex) {}
Descriptor(const std::string& name, const std::string& regex, const std::string& substr = "")
: name_(name), regex_(regex), substr_(substr) {}
const std::string name_;
const std::string regex_;
const std::string substr_;
};

// Cluster name tag
Expand Down Expand Up @@ -289,7 +291,7 @@ class TagNameValues {
const std::vector<Descriptor>& descriptorVec() const { return descriptor_vec_; }

private:
void addRegex(const std::string& name, const std::string& regex);
void addRegex(const std::string& name, const std::string& regex, const std::string& substr = "");

// Collection of tag descriptors.
std::vector<Descriptor> descriptor_vec_;
Expand Down
32 changes: 20 additions & 12 deletions source/common/stats/stats_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ std::string Utility::sanitizeStatsName(const std::string& name) {
return stats_name;
}

TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex)
: name_(name), prefix_(std::string(extractRegexPrefix(regex))),
TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex,
const std::string& substr)
: name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr),
regex_(RegexUtil::parseRegex(regex)) {}

std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) {
Expand All @@ -93,7 +94,8 @@ std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) {
}

TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name,
const std::string& regex) {
const std::string& regex,
const std::string& substr) {

if (name.empty()) {
throw EnvoyException("tag_name cannot be empty");
Expand All @@ -103,13 +105,22 @@ TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name,
throw EnvoyException(fmt::format(
"No regex specified for tag specifier and no default regex for name: '{}'", name));
}
return TagExtractorPtr{new TagExtractorImpl(name, regex)};
return TagExtractorPtr{new TagExtractorImpl(name, regex, substr)};
}

bool TagExtractorImpl::substrMismatch(const std::string& stat_name) const {
return !substr_.empty() && stat_name.find(substr_) == std::string::npos;
}

bool TagExtractorImpl::extractTag(const std::string& stat_name, std::vector<Tag>& tags,
IntervalSet<size_t>& remove_characters) const {

PERF_OPERATION(perf);

if (substrMismatch(stat_name)) {
PERF_RECORD(perf, "re-skip-substr", name_);
return false;
}

std::smatch match;
// The regex must match and contain one or more subexpressions (all after the first are ignored).
if (std::regex_search(stat_name, match, regex_) && match.size() > 1) {
Expand Down Expand Up @@ -181,15 +192,11 @@ int TagProducerImpl::addExtractorsMatching(absl::string_view name) {
int num_found = 0;
for (const auto& desc : Config::TagNames::get().descriptorVec()) {
if (desc.name_ == name) {
addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_));
addExtractor(
Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_));
++num_found;
}
}
// TODO(jmarantz): Changing the default tag regexes so that more than one regex can
// yield the same tag, on the theory that this will reduce regex backtracking. At the
// moment, this doesn't happen, so this flow isn't well tested. When we start exploiting
// this, and it's tested, we can simply remove this assert.
ASSERT(num_found <= 1);
return num_found;
}

Expand Down Expand Up @@ -241,7 +248,8 @@ TagProducerImpl::addDefaultExtractors(const envoy::config::metrics::v2::StatsCon
if (!config.has_use_all_default_tags() || config.use_all_default_tags().value()) {
for (const auto& desc : Config::TagNames::get().descriptorVec()) {
names.emplace(desc.name_);
addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_));
addExtractor(
Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_));
}
}
return names;
Expand Down
17 changes: 15 additions & 2 deletions source/common/stats/stats_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,28 @@ class TagExtractorImpl : public TagExtractor {
* Creates a tag extractor from the regex provided. name and regex must be non-empty.
* @param name name for tag extractor.
* @param regex regex expression.
* @param substr a substring that -- if provided -- must be present in a stat name
* in order to match the regex. This is an optional performance tweak
* to avoid large numbers of failed regex lookups.
* @return TagExtractorPtr newly constructed TagExtractor.
*/
static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex);
static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex,
const std::string& substr = "");

TagExtractorImpl(const std::string& name, const std::string& regex);
TagExtractorImpl(const std::string& name, const std::string& regex,
const std::string& substr = "");
std::string name() const override { return name_; }
bool extractTag(const std::string& tag_extracted_name, std::vector<Tag>& tags,
IntervalSet<size_t>& remove_characters) const override;
absl::string_view prefixToken() const override { return prefix_; }

/**
* @param stat_name The stat name
* @return bool indicates whether tag extraction should be skipped for this stat_name due
* to a substring mismatch.
*/
bool substrMismatch(const std::string& stat_name) const;

private:
/**
* Examines a regex string, looking for the pattern: ^alphanumerics_with_underscores\.
Expand All @@ -53,6 +65,7 @@ class TagExtractorImpl : public TagExtractor {

const std::string name_;
const std::string prefix_;
const std::string substr_;
const std::regex regex_;
};

Expand Down
12 changes: 12 additions & 0 deletions test/common/stats/stats_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,18 @@ TEST(TagExtractorTest, SingleSubexpression) {
EXPECT_EQ("listner_port", tags.at(0).name_);
}

TEST(TagExtractorTest, substrMismatch) {
TagExtractorImpl tag_extractor("listner_port", "^listener\\.(\\d+?\\.)\\.foo\\.", ".foo.");
EXPECT_TRUE(tag_extractor.substrMismatch("listener.80.downstream_cx_total"));
EXPECT_FALSE(tag_extractor.substrMismatch("listener.80.downstream_cx_total.foo.bar"));
}

TEST(TagExtractorTest, noSubstrMismatch) {
TagExtractorImpl tag_extractor("listner_port", "^listener\\.(\\d+?\\.)\\.foo\\.");
EXPECT_FALSE(tag_extractor.substrMismatch("listener.80.downstream_cx_total"));
EXPECT_FALSE(tag_extractor.substrMismatch("listener.80.downstream_cx_total.foo.bar"));
}

TEST(TagExtractorTest, EmptyName) {
EXPECT_THROW_WITH_MESSAGE(TagExtractorImpl::createTagExtractor("", "^listener\\.(\\d+?\\.)"),
EnvoyException, "tag_name cannot be empty");
Expand Down