Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6ffa3ee
add speed & functionality test for using re2 for stats tag matching, …
jmarantz Oct 31, 2019
fa3573a
Revert "add speed & functionality test for using re2 for stats tag ma…
jmarantz Oct 31, 2019
42ac3d0
add re2 experiments & speed test for stats init
jmarantz Oct 31, 2019
e9b9c88
update a single stat tag-extraction RE to (a) be a better pattern and…
jmarantz Oct 31, 2019
d692af2
clean up names of perf categories.
jmarantz Nov 3, 2019
fd34b44
format
jmarantz Nov 3, 2019
e278b2f
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 3, 2019
7e416e8
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 4, 2019
74abaa4
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 6, 2019
a4f297c
format
jmarantz Nov 6, 2019
c0a8983
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 13, 2019
eba2624
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 7, 2020
edf2bb2
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 7, 2020
626a092
Merge branch 're-speed-benchmark' of github.com:jmarantz/envoy into r…
jmarantz Nov 7, 2020
b1bfbdc
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 9, 2020
46a3900
review comments (base-class name and enums).
jmarantz Nov 9, 2020
0fa1ca2
format
jmarantz Nov 9, 2020
fad62de
Remove extraneous main.
jmarantz Nov 9, 2020
1976786
try to fix clang-tidy problems.
jmarantz Nov 10, 2020
8c79d6c
lint workarounds for benchmark naming convention
jmarantz Nov 12, 2020
5c801a6
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 12, 2020
c445533
try to fix clang-tidy and windows compiler errors.
jmarantz Nov 12, 2020
00a0ba1
review comments.
jmarantz Nov 12, 2020
5cf3b48
remaining nits
jmarantz Nov 12, 2020
39783c4
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 16, 2020
b5bac7e
review comments
jmarantz Nov 16, 2020
f66d732
Merge branch 'master' into re-speed-benchmark
jmarantz Nov 16, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions source/common/common/regex.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
namespace Envoy {
namespace Regex {

enum class Type { Re2, StdRegex };

/**
* Utilities for constructing regular expressions.
*/
Expand Down
1 change: 1 addition & 0 deletions source/common/config/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ envoy_cc_library(
hdrs = ["well_known_names.h"],
deps = [
"//source/common/common:assert_lib",
"//source/common/common:regex_lib",
"//source/common/singleton:const_singleton",
],
)
9 changes: 7 additions & 2 deletions source/common/config/well_known_names.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ TagNameValues::TagNameValues() {
addRegex(RATELIMIT_PREFIX, R"(^ratelimit\.((.*?)\.)\w+?$)");

// cluster.(<cluster_name>.)*
addRegex(CLUSTER_NAME, "^cluster\\.((.*?)\\.)");
addRe2(CLUSTER_NAME, "^cluster\\.(([^\\.]+)\\.).*");

// listener.[<address>.]http.(<stat_prefix>.)*
addRegex(HTTP_CONN_MANAGER_PREFIX, R"(^listener(?=\.).*?\.http\.((.*?)\.))", ".http.");
Expand All @@ -119,7 +119,12 @@ TagNameValues::TagNameValues() {

void TagNameValues::addRegex(const std::string& name, const std::string& regex,
const std::string& substr) {
descriptor_vec_.emplace_back(Descriptor(name, regex, substr));
descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::StdRegex});
}

void TagNameValues::addRe2(const std::string& name, const std::string& regex,
const std::string& substr) {
descriptor_vec_.emplace_back(Descriptor{name, regex, substr, Regex::Type::Re2});
}

} // namespace Config
Expand Down
5 changes: 3 additions & 2 deletions source/common/config/well_known_names.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "envoy/common/exception.h"

#include "common/common/assert.h"
#include "common/common/regex.h"
#include "common/singleton/const_singleton.h"

namespace Envoy {
Expand Down Expand Up @@ -62,11 +63,10 @@ class TagNameValues {
* tags, such as "_rq_(\\d)xx$", will probably stay as regexes.
*/
struct Descriptor {
Descriptor(const std::string& name, const std::string& regex, const std::string& substr = "")
: name_(name), regex_(regex), substr_(substr) {}
const std::string name_;
const std::string regex_;
const std::string substr_;
const Regex::Type re_type_;
};

// Cluster name tag
Expand Down Expand Up @@ -130,6 +130,7 @@ class TagNameValues {

private:
void addRegex(const std::string& name, const std::string& regex, const std::string& substr = "");
void addRe2(const std::string& name, const std::string& regex, const std::string& substr = "");

// Collection of tag descriptors.
std::vector<Descriptor> descriptor_vec_;
Expand Down
1 change: 1 addition & 0 deletions source/common/stats/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ envoy_cc_library(
hdrs = ["tag_extractor_impl.h"],
deps = [
"//include/envoy/stats:stats_interface",
"//source/common/common:assert_lib",
"//source/common/common:perf_annotation_lib",
"//source/common/common:regex_lib",
],
Expand Down
94 changes: 75 additions & 19 deletions source/common/stats/tag_extractor_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "envoy/common/exception.h"

#include "common/common/assert.h"
#include "common/common/fmt.h"
#include "common/common/perf_annotation.h"
#include "common/common/regex.h"
Expand All @@ -23,12 +24,11 @@ bool regexStartsWithDot(absl::string_view regex) {

} // namespace

TagExtractorImpl::TagExtractorImpl(const std::string& name, const std::string& regex,
const std::string& substr)
: name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr),
regex_(Regex::Utility::parseStdRegex(regex)) {}
TagExtractorImplBase::TagExtractorImplBase(absl::string_view name, absl::string_view regex,
absl::string_view substr)
: name_(name), prefix_(std::string(extractRegexPrefix(regex))), substr_(substr) {}

std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) {
std::string TagExtractorImplBase::extractRegexPrefix(absl::string_view regex) {
std::string prefix;
if (absl::StartsWith(regex, "^")) {
for (absl::string_view::size_type i = 1; i < regex.size(); ++i) {
Expand All @@ -47,10 +47,10 @@ std::string TagExtractorImpl::extractRegexPrefix(absl::string_view regex) {
return prefix;
}

TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name,
const std::string& regex,
const std::string& substr) {

TagExtractorPtr TagExtractorImplBase::createTagExtractor(absl::string_view name,
absl::string_view regex,
absl::string_view substr,
Regex::Type re_type) {
if (name.empty()) {
throw EnvoyException("tag_name cannot be empty");
}
Expand All @@ -59,19 +59,37 @@ TagExtractorPtr TagExtractorImpl::createTagExtractor(const std::string& name,
throw EnvoyException(fmt::format(
"No regex specified for tag specifier and no default regex for name: '{}'", name));
}
return TagExtractorPtr{new TagExtractorImpl(name, regex, substr)};
switch (re_type) {
case Regex::Type::Re2:
return std::make_unique<TagExtractorRe2Impl>(name, regex, substr);
case Regex::Type::StdRegex:
return std::make_unique<TagExtractorStdRegexImpl>(name, regex, substr);
}
NOT_REACHED_GCOVR_EXCL_LINE;
}

bool TagExtractorImpl::substrMismatch(absl::string_view stat_name) const {
bool TagExtractorImplBase::substrMismatch(absl::string_view stat_name) const {
return !substr_.empty() && stat_name.find(substr_) == absl::string_view::npos;
}

bool TagExtractorImpl::extractTag(absl::string_view stat_name, TagVector& tags,
IntervalSet<size_t>& remove_characters) const {
TagExtractorStdRegexImpl::TagExtractorStdRegexImpl(absl::string_view name, absl::string_view regex,
absl::string_view substr)
: TagExtractorImplBase(name, regex, substr),
regex_(Regex::Utility::parseStdRegex(std::string(regex))) {}

std::string& TagExtractorImplBase::addTag(std::vector<Tag>& tags) const {
tags.emplace_back();
Tag& tag = tags.back();
tag.name_ = name_;
return tag.value_;
}

bool TagExtractorStdRegexImpl::extractTag(absl::string_view stat_name, std::vector<Tag>& tags,
IntervalSet<size_t>& remove_characters) const {
PERF_OPERATION(perf);

if (substrMismatch(stat_name)) {
PERF_RECORD(perf, "re-skip-substr", name_);
PERF_RECORD(perf, "re-skip", name_);
return false;
}

Expand All @@ -88,11 +106,7 @@ bool TagExtractorImpl::extractTag(absl::string_view stat_name, TagVector& tags,
// from the string but also not necessary in the tag value ("." for example). If there is no
// second submatch, then the value_subexpr is the same as the remove_subexpr.
const auto& value_subexpr = match.size() > 2 ? match[2] : remove_subexpr;

tags.emplace_back();
Tag& tag = tags.back();
tag.name_ = name_;
tag.value_ = value_subexpr.str();
addTag(tags) = value_subexpr.str();

// Determines which characters to remove from stat_name to elide remove_subexpr.
std::string::size_type start = remove_subexpr.first - stat_name.begin();
Expand All @@ -105,5 +119,47 @@ bool TagExtractorImpl::extractTag(absl::string_view stat_name, TagVector& tags,
return false;
}

TagExtractorRe2Impl::TagExtractorRe2Impl(absl::string_view name, absl::string_view regex,
absl::string_view substr)
: TagExtractorImplBase(name, regex, substr), regex_(regex) {}

bool TagExtractorRe2Impl::extractTag(absl::string_view stat_name, std::vector<Tag>& tags,
IntervalSet<size_t>& remove_characters) const {
PERF_OPERATION(perf);

if (substrMismatch(stat_name)) {
PERF_RECORD(perf, "re2-skip", name_);
return false;
}

// remove_subexpr is the first submatch. It represents the portion of the string to be removed.
re2::StringPiece remove_subexpr, value_subexpr;

// The regex must match and contain one or more subexpressions (all after the first are ignored).
if (re2::RE2::FullMatch(re2::StringPiece(stat_name.data(), stat_name.size()), regex_,
&remove_subexpr, &value_subexpr) &&
!remove_subexpr.empty()) {

// value_subexpr is the optional second submatch. It is usually inside the first submatch
// (remove_subexpr) to allow the expression to strip off extra characters that should be removed
// from the string but also not necessary in the tag value ("." for example). If there is no
// second submatch, then the value_subexpr is the same as the remove_subexpr.
if (value_subexpr.empty()) {
value_subexpr = remove_subexpr;
}
addTag(tags) = std::string(value_subexpr);

// Determines which characters to remove from stat_name to elide remove_subexpr.
std::string::size_type start = remove_subexpr.data() - stat_name.data();
std::string::size_type end = remove_subexpr.data() + remove_subexpr.size() - stat_name.data();
remove_characters.insert(start, end);

PERF_RECORD(perf, "re2-match", name_);
return true;
}
PERF_RECORD(perf, "re2-miss", name_);
return false;
}

} // namespace Stats
} // namespace Envoy
51 changes: 43 additions & 8 deletions source/common/stats/tag_extractor_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@

#include "envoy/stats/tag_extractor.h"

#include "common/common/regex.h"

#include "absl/strings/string_view.h"
#include "re2/re2.h"

namespace Envoy {
namespace Stats {

class TagExtractorImpl : public TagExtractor {
class TagExtractorImplBase : public TagExtractor {
public:
/**
* Creates a tag extractor from the regex provided. name and regex must be non-empty.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update this comment with new parameter

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done; good catch.

Expand All @@ -20,16 +23,16 @@ class TagExtractorImpl : public TagExtractor {
* @param substr a substring that -- if provided -- must be present in a stat name
* in order to match the regex. This is an optional performance tweak
* to avoid large numbers of failed regex lookups.
* @param re_type the regular expression syntax used (Regex::Type::StdRegex or Regex::Type::Re2).
* @return TagExtractorPtr newly constructed TagExtractor.
*/
static TagExtractorPtr createTagExtractor(const std::string& name, const std::string& regex,
const std::string& substr = "");
static TagExtractorPtr createTagExtractor(absl::string_view name, absl::string_view regex,
absl::string_view substr = "",
Regex::Type re_type = Regex::Type::StdRegex);

TagExtractorImpl(const std::string& name, const std::string& regex,
const std::string& substr = "");
TagExtractorImplBase(absl::string_view name, absl::string_view regex,
absl::string_view substr = "");
std::string name() const override { return name_; }
bool extractTag(absl::string_view tag_extracted_name, TagVector& tags,
IntervalSet<size_t>& remove_characters) const override;
absl::string_view prefixToken() const override { return prefix_; }

/**
Expand All @@ -39,19 +42,51 @@ class TagExtractorImpl : public TagExtractor {
*/
bool substrMismatch(absl::string_view stat_name) const;

private:
protected:
/**
* Examines a regex string, looking for the pattern: ^alphanumerics_with_underscores\.
* Returns "alphanumerics_with_underscores" if that pattern is found, empty-string otherwise.
* @param regex absl::string_view the regex to scan for prefixes.
* @return std::string the prefix, or "" if no prefix found.
*/
static std::string extractRegexPrefix(absl::string_view regex);

/**
* Adds a new tag for the current name, returning a reference to the tag value.
*
* @param tags the list of tags
* @return a reference to the value of the tag that was added.
*/
std::string& addTag(std::vector<Tag>& tags) const;

const std::string name_;
const std::string prefix_;
const std::string substr_;
};

class TagExtractorStdRegexImpl : public TagExtractorImplBase {
public:
TagExtractorStdRegexImpl(absl::string_view name, absl::string_view regex,
absl::string_view substr = "");

bool extractTag(absl::string_view tag_extracted_name, std::vector<Tag>& tags,
IntervalSet<size_t>& remove_characters) const override;

private:
const std::regex regex_;
};

class TagExtractorRe2Impl : public TagExtractorImplBase {
public:
TagExtractorRe2Impl(absl::string_view name, absl::string_view regex,
absl::string_view substr = "");

bool extractTag(absl::string_view tag_extracted_name, std::vector<Tag>& tags,
IntervalSet<size_t>& remove_characters) const override;

private:
const re2::RE2 regex_;
};

} // namespace Stats
} // namespace Envoy
12 changes: 6 additions & 6 deletions source/common/stats/tag_producer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ TagProducerImpl::TagProducerImpl(const envoy::config::metrics::v3::StatsConfig&
"No regex specified for tag specifier and no default regex for name: '{}'", name));
}
} else {
addExtractor(Stats::TagExtractorImpl::createTagExtractor(name, tag_specifier.regex()));
addExtractor(TagExtractorImplBase::createTagExtractor(name, tag_specifier.regex()));
}
} else if (tag_specifier.tag_value_case() ==
envoy::config::metrics::v3::TagSpecifier::TagValueCase::kFixedValue) {
default_tags_.emplace_back(Stats::Tag{name, tag_specifier.fixed_value()});
default_tags_.emplace_back(Tag{name, tag_specifier.fixed_value()});
}
}
}
Expand All @@ -47,8 +47,8 @@ int TagProducerImpl::addExtractorsMatching(absl::string_view name) {
int num_found = 0;
for (const auto& desc : Config::TagNames::get().descriptorVec()) {
if (desc.name_ == name) {
addExtractor(
Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_));
addExtractor(TagExtractorImplBase::createTagExtractor(desc.name_, desc.regex_, desc.substr_,
desc.re_type_));
++num_found;
}
}
Expand Down Expand Up @@ -103,8 +103,8 @@ TagProducerImpl::addDefaultExtractors(const envoy::config::metrics::v3::StatsCon
if (!config.has_use_all_default_tags() || config.use_all_default_tags().value()) {
for (const auto& desc : Config::TagNames::get().descriptorVec()) {
names.emplace(desc.name_);
addExtractor(
Stats::TagExtractorImpl::createTagExtractor(desc.name_, desc.regex_, desc.substr_));
addExtractor(TagExtractorImplBase::createTagExtractor(desc.name_, desc.regex_, desc.substr_,
desc.re_type_));
}
}
return names;
Expand Down
11 changes: 11 additions & 0 deletions test/common/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,17 @@ envoy_cc_test(
deps = ["//source/common/common:callback_impl_lib"],
)

envoy_cc_benchmark_binary(
name = "re_speed_test",
srcs = ["re_speed_test.cc"],
external_deps = ["benchmark"],
deps = [
"//source/common/common:assert_lib",
"//source/common/common:utility_lib",
"@com_googlesource_code_re2//:re2",
],
)

envoy_cc_benchmark_binary(
name = "utility_speed_test",
srcs = ["utility_speed_test.cc"],
Expand Down
Loading