From cf697c9b00f4ccfe9aa5fd95e24cc2b0892d4845 Mon Sep 17 00:00:00 2001 From: "Scott G. Miller" Date: Tue, 3 Sep 2024 13:28:02 -0500 Subject: [PATCH] Rename to be distinct from the original functions and thus not as subtle to later authors where used --- regexp/regexp.go | 17 +++++++++-------- regexp/regexp_test.go | 29 +++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/regexp/regexp.go b/regexp/regexp.go index eba3791..4810e06 100644 --- a/regexp/regexp.go +++ b/regexp/regexp.go @@ -5,13 +5,14 @@ import ( "regexp" "runtime" "sync" - "time" "unsafe" ) -// Caches regexp compilation to avoid CPU and RAM usage for many duplicate regexps - -const defaultTTL = 2 * time.Minute +// "Interns" compilation of Regular Expressions. If two regexs with the same pattern are compiled, the result +// is the same *regexp.Regexp. This avoids the compilation cost but more importantly the memory usage. +// +// Regexps produced from this package are backed by a form of weak-valued map, upon a regex becoming +// unreachable, they will be eventually removed from the map and memory reclaimed. var ( weakMap = make(map[string]uintptr) @@ -20,19 +21,19 @@ var ( l sync.RWMutex ) -func Compile(pattern string) (*regexp.Regexp, error) { +func CompileInterned(pattern string) (*regexp.Regexp, error) { return compile(pattern, regexp.Compile, weakMap) } -func CompilePOSIX(pattern string) (*regexp.Regexp, error) { +func CompilePOSIXInterned(pattern string) (*regexp.Regexp, error) { return compile(pattern, regexp.CompilePOSIX, posixWeakMap) } -func MustCompile(pattern string) *regexp.Regexp { +func MustCompileInterned(pattern string) *regexp.Regexp { return mustCompile(pattern, regexp.MustCompile, weakMap) } -func MustCompilePOSIX(pattern string) *regexp.Regexp { +func MustCompilePOSIXInterned(pattern string) *regexp.Regexp { return mustCompile(pattern, regexp.MustCompilePOSIX, posixWeakMap) } diff --git a/regexp/regexp_test.go b/regexp/regexp_test.go index 6f062ad..0b271aa 100644 --- a/regexp/regexp_test.go +++ b/regexp/regexp_test.go @@ -6,20 +6,37 @@ import ( "testing" ) -func TestRegexpCompilation(t *testing.T) { +func TestInterenedRegexps(t *testing.T) { t.Run("must", func(t *testing.T) { - testMust(t, regexp.MustCompile, MustCompile) + testMust(t, regexp.MustCompile, MustCompileInterned) }) t.Run("must-posix", func(t *testing.T) { - testMust(t, regexp.MustCompilePOSIX, MustCompilePOSIX) + testMust(t, regexp.MustCompilePOSIX, MustCompilePOSIXInterned) }) t.Run("errorable", func(t *testing.T) { - test(t, regexp.Compile, Compile) + test(t, regexp.Compile, CompileInterned) }) t.Run("errorable-posix", func(t *testing.T) { - test(t, regexp.CompilePOSIX, CompilePOSIX) + test(t, regexp.CompilePOSIX, CompilePOSIXInterned) }) - // Unfortunately, GC behavior is untestably flaky + // Check errors + _, err := CompileInterned("(") + require.Error(t, err) + + // Unfortunately, GC behavior is non-deterministic, this section of code works, but not reliably: + /* + ptr1 := reflect.ValueOf(r1).Pointer() + r1 = nil + r2 = nil + runtime.GC() + runtime.GC() + r2, err = MustCompile(".*") + require.NoError(t, err) + ptr2 := reflect.ValueOf(r2).Pointer() + // If GC occurred, this will be a brand new pointer as the regex was removed from maps + require.True(t, ptr1 != ptr2) + + */ } func test(t *testing.T, compile, cachedCompile func(string) (*regexp.Regexp, error)) {