From 186c336f71fa0ca3448d926c4d2c65e7ce7dedba Mon Sep 17 00:00:00 2001 From: Tim Ross Date: Wed, 5 Feb 2025 10:07:55 -0500 Subject: [PATCH] Improve performance of utils.ReplaceRegexp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main changes here are: - Using an LRU cache to store compiled regular expressions. - Removing stack traces captured by trace.NotFound/trace.Wrap when there are no matches. This was heavily inspired by https://github.com/gravitational/teleport/pull/23534 which made similar changes to improve the performance of utils.MatchString. The main beneficiaries of this change are services.MapRoles and services.TraitsToRoles which rely heavily on utils.ReplaceRegexp, utils.RegexpWithConfig, or ReplaceRegexpWith. BenchmarkReplaceRegexp was added to validate the improvements in this change and prevent regressions in the future. Results from before and after this change: benchstat old.txt new.txt goos: darwin goarch: arm64 pkg: github.com/gravitational/teleport/lib/utils cpu: Apple M2 Pro │ old.txt │ new.txt │ │ sec/op │ sec/op vs base │ ReplaceRegexp/same_expression-12 29.527µ ± 12% 6.837µ ± 1% -76.84% (p=0.000 n=10) ReplaceRegexp/unique_expressions-12 22.94µ ± 1% 25.10µ ± 1% +9.41% (p=0.002 n=10) ReplaceRegexp/no_matches-12 24.692µ ± 3% 2.861µ ± 1% -88.41% (p=0.000 n=10) geomean 25.57µ 7.889µ -69.15% │ old.txt │ new.txt │ │ B/op │ B/op vs base │ ReplaceRegexp/same_expression-12 22071.5 ± 1% 164.0 ± 0% -99.26% (p=0.000 n=10) ReplaceRegexp/unique_expressions-12 12.90Ki ± 1% 12.87Ki ± 0% ~ (p=0.165 n=10) ReplaceRegexp/no_matches-12 14257.00 ± 1% 15.00 ± 0% -99.89% (p=0.000 n=10) geomean 15.70Ki 318.8 -98.02% │ old.txt │ new.txt │ │ allocs/op │ allocs/op vs base │ ReplaceRegexp/same_expression-12 58.000 ± 0% 6.000 ± 0% -89.66% (p=0.000 n=10) ReplaceRegexp/unique_expressions-12 55.00 ± 0% 55.00 ± 0% ~ (p=1.000 n=10) ¹ ReplaceRegexp/no_matches-12 58.00 ± 0% 0.00 ± 0% -100.00% (p=0.000 n=10) geomean 56.98 ? ² ³ ¹ all samples are equal ² summaries must be >0 to compute geomean ³ ratios must be >0 to compute geomean --- lib/services/trustedcluster.go | 3 +- lib/utils/replace.go | 51 +++++++++++++++++++++++++--------- lib/utils/replace_test.go | 29 +++++++++++++++++++ lib/utils/utils_test.go | 8 +++--- 4 files changed, 73 insertions(+), 18 deletions(-) diff --git a/lib/services/trustedcluster.go b/lib/services/trustedcluster.go index 15b2df3e33ef8..befb667396d9f 100644 --- a/lib/services/trustedcluster.go +++ b/lib/services/trustedcluster.go @@ -19,6 +19,7 @@ package services import ( + "errors" "fmt" "github.com/gravitational/trace" @@ -133,7 +134,7 @@ func MapRoles(r types.RoleMap, remoteRoles []string) ([]string, error) { } seen[replacement] = struct{}{} outRoles = append(outRoles, replacement) - case trace.IsNotFound(err): + case errors.Is(err, utils.ErrReplaceRegexNotFound): continue default: return nil, trace.Wrap(err) diff --git a/lib/utils/replace.go b/lib/utils/replace.go index 686c39127d532..24a3b0e8b10eb 100644 --- a/lib/utils/replace.go +++ b/lib/utils/replace.go @@ -43,13 +43,18 @@ func GlobToRegexp(in string) string { return replaceWildcard.ReplaceAllString(regexp.QuoteMeta(in), "(.*)") } +// ErrReplaceRegexNotFound is a marker error returned by +// [ReplaceRegexp], [RegexpWithConfig], and [ReplaceRegexpWith] to +// indicate no matches were found. +var ErrReplaceRegexNotFound = &trace.NotFoundError{Message: "no match found"} + // ReplaceRegexp replaces value in string, accepts regular expression and simplified // wildcard syntax, it has several important differences with standard lib // regexp replacer: // * Wildcard globs '*' are treated as regular expression .* expression // * Expression is treated as regular expression if it starts with ^ and ends with $ // * Full match is expected, partial replacements ignored -// * If there is no match, returns a NotFound error +// * If there is no match, returns [ErrReplaceRegexNotFound] func ReplaceRegexp(expression string, replaceWith string, input string) (string, error) { expr, err := RegexpWithConfig(expression, RegexpConfig{}) if err != nil { @@ -58,9 +63,20 @@ func ReplaceRegexp(expression string, replaceWith string, input string) (string, return ReplaceRegexpWith(expr, replaceWith, input) } -// RegexpWithConfig compiles a regular expression given some configuration. -// There are several important differences with standard lib (see ReplaceRegexp). -func RegexpWithConfig(expression string, config RegexpConfig) (*regexp.Regexp, error) { +type regexKey struct { + expression string + ignoreCase bool +} + +// regexpCache interns compiled regular expressions to improve performance. +var regexpCache = mustCache[regexKey, *regexp.Regexp](2000) + +func replaceRegexCached(expression string, config RegexpConfig) (*regexp.Regexp, error) { + key := regexKey{expression: expression, ignoreCase: config.IgnoreCase} + if expr, ok := regexpCache.Get(key); ok { + return expr, nil + } + if !strings.HasPrefix(expression, "^") || !strings.HasSuffix(expression, "$") { // replace glob-style wildcards with regexp wildcards // for plain strings, and quote all characters that could @@ -74,15 +90,27 @@ func RegexpWithConfig(expression string, config RegexpConfig) (*regexp.Regexp, e if err != nil { return nil, trace.BadParameter(err.Error()) } + + regexpCache.Add(key, expr) return expr, nil } -// ReplaceRegexp replaces string in a given regexp. +// RegexpWithConfig compiles a regular expression given some configuration. +// There are several important differences with standard lib (see ReplaceRegexp). +func RegexpWithConfig(expression string, config RegexpConfig) (*regexp.Regexp, error) { + expr, err := replaceRegexCached(expression, config) + return expr, trace.Wrap(err) +} + +// ReplaceRegexpWith replaces string in a given regexp. func ReplaceRegexpWith(expr *regexp.Regexp, replaceWith string, input string) (string, error) { - // if there is no match, return NotFound error index := expr.FindStringIndex(input) if index == nil { - return "", trace.NotFound("no match found") + // The returned error is intentionally not wrapped to avoid + // capturing stack traces. This method is used by authorization + // logic and the additional overhead of strack trace capturing + // is a performance bottleneck. + return "", ErrReplaceRegexNotFound } return expr.ReplaceAllString(input, replaceWith), nil } @@ -367,10 +395,6 @@ func mustCache[K comparable, V any](size int) *lru.Cache[K, V] { return cache } -// exprCache interns compiled regular expressions created in MatchString -// to improve performance. -var exprCache = mustCache[string, *regexp.Regexp](1000) - // MatchString will match an input against the given expression. The expression is cached for later use. func MatchString(input, expression string) (bool, error) { expr, err := compileRegexCached(expression) @@ -403,7 +427,8 @@ func CompileExpression(expression string) (*regexp.Regexp, error) { } func compileRegexCached(expression string) (*regexp.Regexp, error) { - if expr, ok := exprCache.Get(expression); ok { + key := regexKey{expression: expression} + if expr, ok := regexpCache.Get(key); ok { return expr, nil } @@ -412,7 +437,7 @@ func compileRegexCached(expression string) (*regexp.Regexp, error) { return nil, trace.Wrap(err) } - exprCache.Add(expression, expr) + regexpCache.Add(key, expr) return expr, nil } diff --git a/lib/utils/replace_test.go b/lib/utils/replace_test.go index e2583d4606500..2b78ba843c767 100644 --- a/lib/utils/replace_test.go +++ b/lib/utils/replace_test.go @@ -19,6 +19,7 @@ package utils import ( + "strconv" "testing" "github.com/stretchr/testify/require" @@ -1319,3 +1320,31 @@ func TestKubeResourceCouldMatchRules(t *testing.T) { }) } } + +func BenchmarkReplaceRegexp(b *testing.B) { + b.Run("same expression", func(b *testing.B) { + for i := 0; i < b.N; i++ { + replaced, err := ReplaceRegexp("*", "foo", "test") + require.NoError(b, err) + require.NotEmpty(b, replaced) + } + }) + + b.Run("unique expressions", func(b *testing.B) { + for i := 0; i < b.N; i++ { + r := strconv.Itoa(i) + replaced, err := ReplaceRegexp(r, r, r) + require.NoError(b, err) + require.NotEmpty(b, replaced) + } + }) + + b.Run("no matches", func(b *testing.B) { + expression := "$abc^" + for i := 0; i < b.N; i++ { + replaced, err := ReplaceRegexp(expression, strconv.Itoa(i), "test") + require.ErrorIs(b, err, ErrReplaceRegexNotFound) + require.Empty(b, replaced) + } + }) +} diff --git a/lib/utils/utils_test.go b/lib/utils/utils_test.go index 94f0b92e6d3c7..992a5198a443a 100644 --- a/lib/utils/utils_test.go +++ b/lib/utils/utils_test.go @@ -342,21 +342,21 @@ func TestReplaceRegexp(t *testing.T) { expr: "value", replace: "value", in: "val", - err: trace.NotFound(""), + err: ErrReplaceRegexNotFound, }, { comment: "empty value is no match", expr: "", replace: "value", in: "value", - err: trace.NotFound(""), + err: ErrReplaceRegexNotFound, }, { comment: "bad regexp results in bad parameter error", expr: "^(($", replace: "value", in: "val", - err: trace.BadParameter(""), + err: &trace.BadParameterError{Message: "error parsing regexp: missing closing ): `^(($`"}, }, { comment: "full match is supported", @@ -415,7 +415,7 @@ func TestReplaceRegexp(t *testing.T) { require.NoError(t, err) require.Equal(t, testCase.out, out) } else { - require.IsType(t, testCase.err, err) + require.ErrorIs(t, err, testCase.err) } }) }