Skip to content
Merged
51 changes: 30 additions & 21 deletions source/common/config/well_known_names.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,55 +24,60 @@ TagNameValues::TagNameValues() {
// - Typical * notation will be used to denote an arbitrary set of characters.

// *_rq(_<response_code>)
addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$");
addRegex(RESPONSE_CODE, "_rq(_(\\d{3}))$", "_rq_");

// *_rq_(<response_code_class>)xx
addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$");
addRegex(RESPONSE_CODE_CLASS, "_rq_(\\d)xx$", "_rq_");

// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.[<operation_name>.](__partition_id=<last_seven_characters_from_partition_id>)
addRegex(DYNAMO_PARTITION_ID, "^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\."
"capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))"
"$");
addRegex(DYNAMO_PARTITION_ID,
"^http(?=\\.).*?\\.dynamodb\\.table(?=\\.).*?\\."
"capacity(?=\\.).*?(\\.__partition_id=(\\w{7}))$",
".dynamodb.table.");

// http.[<stat_prefix>.]dynamodb.operation.(<operation_name>.)<base_stat> or
// http.[<stat_prefix>.]dynamodb.table.[<table_name>.]capacity.(<operation_name>.)[<partition_id>]
addRegex(DYNAMO_OPERATION, "^http(?=\\.).*?\\.dynamodb.(?:operation|table(?="
"\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)");
addRegex(DYNAMO_OPERATION,
"^http(?=\\.).*?\\.dynamodb.(?:operation|table(?="
"\\.).*?\\.capacity)(\\.(.*?))(?:\\.|$)",
".dynamodb.");

// mongo.[<stat_prefix>.]collection.[<collection>.]callsite.(<callsite>.)query.<base_stat>
addRegex(MONGO_CALLSITE,
"^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$");
"^mongo(?=\\.).*?\\.collection(?=\\.).*?\\.callsite\\.((.*?)\\.).*?query.\\w+?$",
".collection.");

// http.[<stat_prefix>.]dynamodb.table.(<table_name>.) or
// http.[<stat_prefix>.]dynamodb.error.(<table_name>.)*
addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)");
addRegex(DYNAMO_TABLE, "^http(?=\\.).*?\\.dynamodb.(?:table|error)\\.((.*?)\\.)", ".dynamodb.");

// mongo.[<stat_prefix>.]collection.(<collection>.)query.<base_stat>
addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$");
addRegex(MONGO_COLLECTION, "^mongo(?=\\.).*?\\.collection\\.((.*?)\\.).*?query.\\w+?$",
".collection.");

// mongo.[<stat_prefix>.]cmd.(<cmd>.)<base_stat>
addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$");
addRegex(MONGO_CMD, "^mongo(?=\\.).*?\\.cmd\\.((.*?)\\.)\\w+?$", ".cmd.");

// cluster.[<route_target_cluster>.]grpc.[<grpc_service>.](<grpc_method>.)<base_stat>
addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$");
addRegex(GRPC_BRIDGE_METHOD, "^cluster(?=\\.).*?\\.grpc(?=\\.).*\\.((.*?)\\.)\\w+?$", ".grpc.");

// http.[<stat_prefix>.]user_agent.(<user_agent>.)<base_stat>
addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$");
addRegex(HTTP_USER_AGENT, "^http(?=\\.).*?\\.user_agent\\.((.*?)\\.)\\w+?$", ".user_agent.");

// vhost.[<virtual host name>.]vcluster.(<virtual_cluster_name>.)<base_stat>
addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$");
addRegex(VIRTUAL_CLUSTER, "^vhost(?=\\.).*?\\.vcluster\\.((.*?)\\.)\\w+?$", ".vcluster.");

// http.[<stat_prefix>.]fault.(<downstream_cluster>.)<base_stat>
addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$");
addRegex(FAULT_DOWNSTREAM_CLUSTER, "^http(?=\\.).*?\\.fault\\.((.*?)\\.)\\w+?$", ".fault.");

// listener.[<address>.]ssl.cipher.(<cipher>)
addRegex(SSL_CIPHER, "^listener(?=\\.).*?\\.ssl\\.cipher(\\.(.*?))$");

// cluster.[<cluster_name>.]ssl.ciphers.(<cipher>)
addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$");
addRegex(SSL_CIPHER_SUITE, "^cluster(?=\\.).*?\\.ssl\\.ciphers(\\.(.*?))$", ".ssl.ciphers.");

// cluster.[<route_target_cluster>.]grpc.(<grpc_service>.)*
addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)");
addRegex(GRPC_BRIDGE_SERVICE, "^cluster(?=\\.).*?\\.grpc\\.((.*?)\\.)", ".grpc.");

// tcp.(<stat_prefix>.)<base_stat>
addRegex(TCP_PREFIX, "^tcp\\.((.*?)\\.)\\w+?$");
Expand All @@ -86,8 +91,11 @@ TagNameValues::TagNameValues() {
// cluster.(<cluster_name>.)*
addRegex(CLUSTER_NAME, "^cluster\\.((.*?)\\.)");

// http.(<stat_prefix>.)* or listener.[<address>.]http.(<stat_prefix>.)*
addRegex(HTTP_CONN_MANAGER_PREFIX, "^(?:|listener(?=\\.).*?\\.)http\\.((.*?)\\.)");
// listener.[<address>.]http.(<stat_prefix>.)*
addRegex(HTTP_CONN_MANAGER_PREFIX, "^listener(?=\\.).*?\\.http\\.((.*?)\\.)", ".http.");

// http.(<stat_prefix>.)*
addRegex(HTTP_CONN_MANAGER_PREFIX, "^http\\.((.*?)\\.)");

// listener.(<address>.)*
addRegex(LISTENER_ADDRESS,
Expand All @@ -100,8 +108,9 @@ TagNameValues::TagNameValues() {
addRegex(MONGO_PREFIX, "^mongo\\.((.*?)\\.)");
}

void TagNameValues::addRegex(const std::string& name, const std::string& regex) {
descriptor_vec_.emplace_back(Descriptor(name, regex));
void TagNameValues::addRegex(const std::string& name, const std::string& regex,
const std::string& substr) {
descriptor_vec_.emplace_back(Descriptor(name, regex, substr));
}

} // namespace Config
Expand Down
6 changes: 4 additions & 2 deletions source/common/config/well_known_names.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,11 @@ class TagNameValues {
* tags, such as "_rq_(\\d)xx$", will probably stay as regexes.
*/
struct Descriptor {
Descriptor(const std::string& name, const std::string& regex) : name_(name), regex_(regex) {}
Descriptor(const std::string& name, const std::string& regex, const std::string& substr = "")
: name_(name), regex_(regex), substr_(substr) {}
const std::string name_;
const std::string regex_;
const std::string substr_;
};

// Cluster name tag
Expand Down Expand Up @@ -289,7 +291,7 @@ class TagNameValues {
const std::vector<Descriptor>& descriptorVec() const { return descriptor_vec_; }

private:
void addRegex(const std::string& name, const std::string& regex);
void addRegex(const std::string& name, const std::string& regex, const std::string& substr = "");

// Collection of tag descriptors.
std::vector<Descriptor> descriptor_vec_;
Expand Down
32 changes: 20 additions & 12 deletions source/common/stats/stats_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ std::string Utility::sanitizeStatsName(const std::string& name) {
return stats_name;
}

TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex)
: name_(name), prefix_(std::string(extractRegexPrefix(regex))),
TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex,
const std::string& substr)
: name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr),
regex_(RegexUtil::parseRegex(regex)) {}

std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) {
Expand All @@ -93,7 +94,8 @@ std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) {
}

TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name,
const std::string& regex) {
const std::string& regex,
const std::string& substr) {

if (name.empty()) {
throw EnvoyException("tag_name cannot be empty");
Expand All @@ -103,13 +105,22 @@ TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name,
throw EnvoyException(fmt::format(
"No regex specified for tag specifier and no default regex for name: '{}'", name));
}
return TagExtractorPtr{new TagExtractorImpl(name, regex)};
return TagExtractorPtr{new TagExtractorImpl(name, regex, substr)};
}

bool TagExtractorImpl::substrMismatch(const std::string& stat_name) const {
return !substr_.empty() && stat_name.find(substr_) == std::string::npos;
}

bool TagExtractorImpl::extractTag(const std::string& stat_name, std::vector<Tag>& tags,
IntervalSet<size_t>& remove_characters) const {

PERF_OPERATION(perf);

if (substrMismatch(stat_name)) {
PERF_RECORD(perf, "re-skip-substr", name_);
return false;
}

std::smatch match;
// The regex must match and contain one or more subexpressions (all after the first are ignored).
if (std::regex_search(stat_name, match, regex_) && match.size() > 1) {
Expand Down Expand Up @@ -181,15 +192,11 @@ int TagProducerImpl::addExtractorsMatching(absl::string_view name) {
int num_found = 0;
for (const auto& desc : Config::TagNames::get().descriptorVec()) {
if (desc.name_ == name) {
addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_));
addExtractor(
Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_));
++num_found;
}
}
// TODO(jmarantz): Changing the default tag regexes so that more than one regex can
// yield the same tag, on the theory that this will reduce regex backtracking. At the
// moment, this doesn't happen, so this flow isn't well tested. When we start exploiting
// this, and it's tested, we can simply remove this assert.
ASSERT(num_found <= 1);
return num_found;
}

Expand Down Expand Up @@ -241,7 +248,8 @@ TagProducerImpl::addDefaultExtractors(const envoy::config::metrics::v2::StatsCon
if (!config.has_use_all_default_tags() || config.use_all_default_tags().value()) {
for (const auto& desc : Config::TagNames::get().descriptorVec()) {
names.emplace(desc.name_);
addExtractor(Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_));
addExtractor(
Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_));
}
}
return names;
Expand Down
17 changes: 15 additions & 2 deletions source/common/stats/stats_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,28 @@ class TagExtractorImpl : public TagExtractor {
* Creates a tag extractor from the regex provided. name and regex must be non-empty.
* @param name name for tag extractor.
* @param regex regex expression.
* @param substr a substring that -- if provided -- must be present in a stat name
* in order to match the regex. This is an optional performance tweak
* to avoid large numbers of failed regex lookups.
* @return TagExtractorPtr newly constructed TagExtractor.
*/
static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex);
static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add docs here and elsewhere that explain the prefix and what it does.

@mrice32 mrice32 Mar 7, 2018

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once that's done, LGTM.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done; thanks for the review, and good catch!

const std::string& substr = "");

TagExtractorImpl(const std::string& name, const std::string& regex);
TagExtractorImpl(const std::string& name, const std::string& regex,
const std::string& substr = "");
std::string name() const override { return name_; }
bool extractTag(const std::string& tag_extracted_name, std::vector<Tag>& tags,
IntervalSet<size_t>& remove_characters) const override;
absl::string_view prefixToken() const override { return prefix_; }

/**
* @param stat_name The stat name
* @return bool indicates whether tag extraction should be skipped for this stat_name due
* to a subdstring mismatch.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: s/subdstring/substring/

*/
bool substrMismatch(const std::string& stat_name) const;

private:
/**
* Examines a regex string, looking for the pattern: ^alphanumerics_with_underscores\.
Expand All @@ -53,6 +65,7 @@ class TagExtractorImpl : public TagExtractor {

const std::string name_;
const std::string prefix_;
const std::string substr_;
const std::regex regex_;
};

Expand Down
12 changes: 12 additions & 0 deletions test/common/stats/stats_impl_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,18 @@ TEST(TagExtractorTest, SingleSubexpression) {
EXPECT_EQ("listner_port", tags.at(0).name_);
}

TEST(TagExtractorTest, substrMismatch) {
TagExtractorImpl tag_extractor("listner_port", "^listener\\.(\\d+?\\.)\\.foo\\.", ".foo.");
EXPECT_TRUE(tag_extractor.substrMismatch("listener.80.downstream_cx_total"));
EXPECT_FALSE(tag_extractor.substrMismatch("listener.80.downstream_cx_total.foo.bar"));
}

TEST(TagExtractorTest, noSubstrMismatch) {
TagExtractorImpl tag_extractor("listner_port", "^listener\\.(\\d+?\\.)\\.foo\\.");
EXPECT_FALSE(tag_extractor.substrMismatch("listener.80.downstream_cx_total"));
EXPECT_FALSE(tag_extractor.substrMismatch("listener.80.downstream_cx_total.foo.bar"));
}

TEST(TagExtractorTest, EmptyName) {
EXPECT_THROW_WITH_MESSAGE(TagExtractorImpl::createTagExtractor("", "^listener\\.(\\d+?\\.)"),
EnvoyException, "tag_name cannot be empty");
Expand Down