Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: cache regexp compiles #213

Merged
merged 1 commit into from
Sep 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions internal/cachedregexp/regex.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package cachedregexp

import (
"regexp"
"sync"
)

//nolint:gochecknoglobals // this is the whole point of being a cache
var cache sync.Map

func MustCompile(exp string) *regexp.Regexp {
compiled, ok := cache.Load(exp)
if !ok {
compiled, _ = cache.LoadOrStore(exp, regexp.MustCompile(exp))
}

return compiled.(*regexp.Regexp)
}
17 changes: 9 additions & 8 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package main
import (
"bytes"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"github.com/google/go-cmp/cmp"
"os"
"path/filepath"
Expand All @@ -18,17 +19,17 @@ func dedent(t *testing.T, str string) string {
str = strings.ReplaceAll(str, "\t", " ")

// 1. remove trailing whitespace
re := regexp.MustCompile(`\r?\n([\t ]*)$`)
re := cachedregexp.MustCompile(`\r?\n([\t ]*)$`)
str = re.ReplaceAllString(str, "")

// 2. if any of the lines are not indented, return as we're already dedent-ed
re = regexp.MustCompile(`(^|\r?\n)[^\t \n]`)
re = cachedregexp.MustCompile(`(^|\r?\n)[^\t \n]`)
if re.MatchString(str) {
return str
}

// 3. find all line breaks to determine the highest common indentation level
re = regexp.MustCompile(`\n[\t ]+`)
re = cachedregexp.MustCompile(`\n[\t ]+`)
matches := re.FindAllString(str, -1)

// 4. remove the common indentation from all strings
Expand All @@ -41,12 +42,12 @@ func dedent(t *testing.T, str string) string {
}
}

re := regexp.MustCompile(`\n[\t ]{` + fmt.Sprint(size) + `}`)
re := cachedregexp.MustCompile(`\n[\t ]{` + fmt.Sprint(size) + `}`)
str = re.ReplaceAllString(str, "\n")
}

// 5. Remove leading whitespace.
re = regexp.MustCompile(`^\r?\n`)
re = cachedregexp.MustCompile(`^\r?\n`)
str = re.ReplaceAllString(str, "")

return str
Expand All @@ -60,7 +61,7 @@ func areEqual(t *testing.T, actual, expect string) bool {
expect = regexp.QuoteMeta(expect)
expect = strings.ReplaceAll(expect, "%%", ".+")

re := regexp.MustCompile(`^` + expect + `$`)
re := cachedregexp.MustCompile(`^` + expect + `$`)

return re.MatchString(actual)
}
Expand All @@ -85,7 +86,7 @@ func normalizeFilePaths(output string) string {
// the number of vulnerabilities and the time that the database was last updated)
// in the output with %% wildcards, in order to reduce the noise of the cmp diff
func wildcardDatabaseStats(str string) string {
re := regexp.MustCompile(`(\w+) \(\d+ vulnerabilities, including withdrawn - last updated \w{3}, \d\d \w{3} \d{4} [012]\d:\d\d:\d\d GMT\)`)
re := cachedregexp.MustCompile(`(\w+) \(\d+ vulnerabilities, including withdrawn - last updated \w{3}, \d\d \w{3} \d{4} [012]\d:\d\d:\d\d GMT\)`)

return re.ReplaceAllString(str, "$1 (%% vulnerabilities, including withdrawn - last updated %%)")
}
Expand Down Expand Up @@ -1384,7 +1385,7 @@ func TestRun_EndToEnd(t *testing.T) {
}

tests := make([]cliTestCase, 0, len(files)/2)
re := regexp.MustCompile(`\d+-(.*)`)
re := cachedregexp.MustCompile(`\d+-(.*)`)

for _, f := range files {
if strings.HasSuffix(f.Name(), ".out.txt") {
Expand Down
4 changes: 2 additions & 2 deletions pkg/database/osv.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ import (
"encoding/json"
"fmt"
"github.com/g-rath/osv-detector/internal"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"github.com/g-rath/osv-detector/pkg/lockfile"
"github.com/g-rath/osv-detector/pkg/semantic"
"os"
"regexp"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -43,7 +43,7 @@ func (p Package) NormalizedName() string {
}

// per https://www.python.org/dev/peps/pep-0503/#normalized-names
name := regexp.MustCompile(`[-_.]+`).ReplaceAllString(p.Name, "-")
name := cachedregexp.MustCompile(`[-_.]+`).ReplaceAllString(p.Name, "-")

return strings.ToLower(name)
}
Expand Down
12 changes: 6 additions & 6 deletions pkg/lockfile/parse-gemfile-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package lockfile

import (
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"log"
"os"
"regexp"
"strings"
)

Expand Down Expand Up @@ -55,8 +55,8 @@ func (parser *gemfileLockfileParser) addDependency(name string, version string)
}

func (parser *gemfileLockfileParser) parseSpec(line string) {
// nameVersionReg := regexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
nameVersionReg := regexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
// nameVersionReg := cachedregexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)
nameVersionReg := cachedregexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`)

results := nameVersionReg.FindStringSubmatch(line)

Expand All @@ -82,7 +82,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) {
}

// OPTIONS = /^ ([a-z]+): (.*)$/i.freeze
optionsRegexp := regexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`)
optionsRegexp := cachedregexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`)

// todo: support
options := optionsRegexp.FindStringSubmatch(line)
Expand All @@ -105,7 +105,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) {
}

func isNotIndented(line string) bool {
re := regexp.MustCompile(`^\S`)
re := cachedregexp.MustCompile(`^\S`)

return re.MatchString(line)
}
Expand All @@ -127,7 +127,7 @@ func (parser *gemfileLockfileParser) parseLineBasedOnState(line string) {
}

func (parser *gemfileLockfileParser) parse(contents string) {
lineMatcher := regexp.MustCompile(`(?:\r?\n)+`)
lineMatcher := cachedregexp.MustCompile(`(?:\r?\n)+`)

lines := lineMatcher.Split(contents, -1)

Expand Down
6 changes: 3 additions & 3 deletions pkg/lockfile/parse-maven-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ package lockfile
import (
"encoding/xml"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"os"
"regexp"
)

type MavenLockDependency struct {
Expand All @@ -15,7 +15,7 @@ type MavenLockDependency struct {
}

func (mld MavenLockDependency) parseResolvedVersion(version string) string {
versionRequirementReg := regexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`)
versionRequirementReg := cachedregexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`)

results := versionRequirementReg.FindStringSubmatch(version)

Expand All @@ -27,7 +27,7 @@ func (mld MavenLockDependency) parseResolvedVersion(version string) string {
}

func (mld MavenLockDependency) resolveVersionValue(lockfile MavenLockFile) string {
interpolationReg := regexp.MustCompile(`\${(.+)}`)
interpolationReg := cachedregexp.MustCompile(`\${(.+)}`)

results := interpolationReg.FindStringSubmatch(mld.Version)

Expand Down
4 changes: 2 additions & 2 deletions pkg/lockfile/parse-mix-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ package lockfile
import (
"bufio"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"os"
"regexp"
"strings"
)

Expand All @@ -17,7 +17,7 @@ func ParseMixLock(pathToLockfile string) ([]PackageDetails, error) {
}
defer file.Close()

re := regexp.MustCompile(`^ +"(\w+)": \{.+,$`)
re := cachedregexp.MustCompile(`^ +"(\w+)": \{.+,$`)

scanner := bufio.NewScanner(file)

Expand Down
8 changes: 4 additions & 4 deletions pkg/lockfile/parse-pnpm-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package lockfile

import (
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"gopkg.in/yaml.v2"
"os"
"regexp"
"strconv"
"strings"
)
Expand Down Expand Up @@ -54,7 +54,7 @@ func (l *PnpmLockfile) UnmarshalYAML(unmarshal func(interface{}) error) error {
const PnpmEcosystem = NpmEcosystem

func startsWithNumber(str string) bool {
matcher := regexp.MustCompile(`^\d`)
matcher := cachedregexp.MustCompile(`^\d`)

return matcher.MatchString(str)
}
Expand Down Expand Up @@ -105,7 +105,7 @@ func extractPnpmPackageNameAndVersion(dependencyPath string) (string, string) {

func parseNameAtVersion(value string) (name string, version string) {
// look for pattern "name@version", where name is allowed to contain zero or more "@"
matches := regexp.MustCompile(`^(.+)@([\w.-]+)(?:\(|$)`).FindStringSubmatch(value)
matches := cachedregexp.MustCompile(`^(.+)@([\w.-]+)(?:\(|$)`).FindStringSubmatch(value)

if len(matches) != 3 {
return name, ""
Expand Down Expand Up @@ -139,7 +139,7 @@ func parsePnpmLock(lockfile PnpmLockfile) []PackageDetails {
commit := pkg.Resolution.Commit

if strings.HasPrefix(pkg.Resolution.Tarball, "https://codeload.github.com") {
re := regexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`)
re := cachedregexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`)
matched := re.FindStringSubmatch(pkg.Resolution.Tarball)

if matched != nil {
Expand Down
8 changes: 4 additions & 4 deletions pkg/lockfile/parse-requirements-txt.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package lockfile
import (
"bufio"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"os"
"path/filepath"
"regexp"
"strings"
)

Expand Down Expand Up @@ -67,15 +67,15 @@ func parseLine(line string) PackageDetails {
// than false negatives, and can be dealt with when/if it actually happens.
func normalizedRequirementName(name string) string {
// per https://www.python.org/dev/peps/pep-0503/#normalized-names
name = regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")
name = cachedregexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")
name = strings.ToLower(name)
name = strings.Split(name, "[")[0]

return name
}

func removeComments(line string) string {
var re = regexp.MustCompile(`(^|\s+)#.*$`)
var re = cachedregexp.MustCompile(`(^|\s+)#.*$`)

return strings.TrimSpace(re.ReplaceAllString(line, ""))
}
Expand All @@ -95,7 +95,7 @@ func isNotRequirementLine(line string) bool {
func isLineContinuation(line string) bool {
// checks that the line ends with an odd number of backslashes,
// meaning the last one isn't escaped
var re = regexp.MustCompile(`([^\\]|^)(\\{2})*\\$`)
var re = cachedregexp.MustCompile(`([^\\]|^)(\\{2})*\\$`)

return re.MatchString(line)
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/lockfile/parse-yarn-lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ package lockfile
import (
"bufio"
"fmt"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"net/url"
"os"
"regexp"
"strings"
)

Expand Down Expand Up @@ -63,7 +63,7 @@ func extractYarnPackageName(str string) string {
}

func determineYarnPackageVersion(group []string) string {
re := regexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`)
re := cachedregexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`)

for _, s := range group {
matched := re.FindStringSubmatch(s)
Expand All @@ -78,7 +78,7 @@ func determineYarnPackageVersion(group []string) string {
}

func determineYarnPackageResolution(group []string) string {
re := regexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`)
re := cachedregexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`)

for _, s := range group {
matched := re.FindStringSubmatch(s)
Expand Down Expand Up @@ -111,7 +111,7 @@ func tryExtractCommit(resolution string) string {
}

for _, matcher := range matchers {
re := regexp.MustCompile(matcher)
re := cachedregexp.MustCompile(matcher)
matched := re.FindStringSubmatch(resolution)

if matched != nil {
Expand Down
6 changes: 3 additions & 3 deletions pkg/semantic/version-maven.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package semantic

import (
"fmt"
"regexp"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"sort"
"strings"
)
Expand Down Expand Up @@ -175,11 +175,11 @@ func (mv MavenVersion) lessThan(mw MavenVersion) bool {
// According to Maven's implementation, any non-digit is a "character":
// https://github.com/apache/maven/blob/965aaa53da5c2d814e94a41d37142d0d6830375d/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java#L627
func mavenFindTransitions(token string) (ints []int) {
for _, span := range regexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) {
for _, span := range cachedregexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) {
ints = append(ints, span[0]+1)
}

for _, span := range regexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) {
for _, span := range cachedregexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) {
ints = append(ints, span[0]+1)
}

Expand Down
8 changes: 4 additions & 4 deletions pkg/semantic/version-packagist.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package semantic

import (
"regexp"
"github.com/g-rath/osv-detector/internal/cachedregexp"
"strconv"
"strings"
)
Expand All @@ -15,9 +15,9 @@ func canonicalizePackagistVersion(v string) string {
// the trimming...)
v = strings.TrimPrefix(strings.TrimPrefix(v, "v"), "V")

v = regexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".")
v = regexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2")
v = regexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2")
v = cachedregexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".")
v = cachedregexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2")
v = cachedregexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2")

return v
}
Expand Down
Loading