Skip to content

Commit

Permalink
Rename to be distinct from the original functions and thus not as sub…
Browse files Browse the repository at this point in the history
…tle to later authors where used
  • Loading branch information
sgmiller committed Sep 3, 2024
1 parent 892773d commit cf697c9
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 14 deletions.
17 changes: 9 additions & 8 deletions regexp/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ import (
"regexp"
"runtime"
"sync"
"time"
"unsafe"
)

// Caches regexp compilation to avoid CPU and RAM usage for many duplicate regexps

const defaultTTL = 2 * time.Minute
// "Interns" compilation of Regular Expressions. If two regexs with the same pattern are compiled, the result
// is the same *regexp.Regexp. This avoids the compilation cost but more importantly the memory usage.
//
// Regexps produced from this package are backed by a form of weak-valued map, upon a regex becoming
// unreachable, they will be eventually removed from the map and memory reclaimed.

var (
weakMap = make(map[string]uintptr)
Expand All @@ -20,19 +21,19 @@ var (
l sync.RWMutex
)

func Compile(pattern string) (*regexp.Regexp, error) {
func CompileInterned(pattern string) (*regexp.Regexp, error) {
return compile(pattern, regexp.Compile, weakMap)
}

func CompilePOSIX(pattern string) (*regexp.Regexp, error) {
func CompilePOSIXInterned(pattern string) (*regexp.Regexp, error) {
return compile(pattern, regexp.CompilePOSIX, posixWeakMap)
}

func MustCompile(pattern string) *regexp.Regexp {
func MustCompileInterned(pattern string) *regexp.Regexp {
return mustCompile(pattern, regexp.MustCompile, weakMap)
}

func MustCompilePOSIX(pattern string) *regexp.Regexp {
func MustCompilePOSIXInterned(pattern string) *regexp.Regexp {
return mustCompile(pattern, regexp.MustCompilePOSIX, posixWeakMap)
}

Expand Down
29 changes: 23 additions & 6 deletions regexp/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,37 @@ import (
"testing"
)

func TestRegexpCompilation(t *testing.T) {
func TestInterenedRegexps(t *testing.T) {
t.Run("must", func(t *testing.T) {
testMust(t, regexp.MustCompile, MustCompile)
testMust(t, regexp.MustCompile, MustCompileInterned)
})
t.Run("must-posix", func(t *testing.T) {
testMust(t, regexp.MustCompilePOSIX, MustCompilePOSIX)
testMust(t, regexp.MustCompilePOSIX, MustCompilePOSIXInterned)
})
t.Run("errorable", func(t *testing.T) {
test(t, regexp.Compile, Compile)
test(t, regexp.Compile, CompileInterned)
})
t.Run("errorable-posix", func(t *testing.T) {
test(t, regexp.CompilePOSIX, CompilePOSIX)
test(t, regexp.CompilePOSIX, CompilePOSIXInterned)
})
// Unfortunately, GC behavior is untestably flaky
// Check errors
_, err := CompileInterned("(")
require.Error(t, err)

// Unfortunately, GC behavior is non-deterministic, this section of code works, but not reliably:
/*
ptr1 := reflect.ValueOf(r1).Pointer()
r1 = nil
r2 = nil
runtime.GC()
runtime.GC()
r2, err = MustCompile(".*")
require.NoError(t, err)
ptr2 := reflect.ValueOf(r2).Pointer()
// If GC occurred, this will be a brand new pointer as the regex was removed from maps
require.True(t, ptr1 != ptr2)
*/
}

func test(t *testing.T, compile, cachedCompile func(string) (*regexp.Regexp, error)) {
Expand Down

0 comments on commit cf697c9

Please sign in to comment.