diff --git a/internal/cachedregexp/regex.go b/internal/cachedregexp/regex.go new file mode 100644 index 00000000..8aea0be6 --- /dev/null +++ b/internal/cachedregexp/regex.go @@ -0,0 +1,18 @@ +package cachedregexp + +import ( + "regexp" + "sync" +) + +//nolint:gochecknoglobals // this is the whole point of being a cache +var cache sync.Map + +func MustCompile(exp string) *regexp.Regexp { + compiled, ok := cache.Load(exp) + if !ok { + compiled, _ = cache.LoadOrStore(exp, regexp.MustCompile(exp)) + } + + return compiled.(*regexp.Regexp) +} diff --git a/main_test.go b/main_test.go index ff1b2152..2e6ae22e 100644 --- a/main_test.go +++ b/main_test.go @@ -3,6 +3,7 @@ package main import ( "bytes" "fmt" + "github.com/g-rath/osv-detector/internal/cachedregexp" "github.com/google/go-cmp/cmp" "os" "path/filepath" @@ -18,17 +19,17 @@ func dedent(t *testing.T, str string) string { str = strings.ReplaceAll(str, "\t", " ") // 1. remove trailing whitespace - re := regexp.MustCompile(`\r?\n([\t ]*)$`) + re := cachedregexp.MustCompile(`\r?\n([\t ]*)$`) str = re.ReplaceAllString(str, "") // 2. if any of the lines are not indented, return as we're already dedent-ed - re = regexp.MustCompile(`(^|\r?\n)[^\t \n]`) + re = cachedregexp.MustCompile(`(^|\r?\n)[^\t \n]`) if re.MatchString(str) { return str } // 3. find all line breaks to determine the highest common indentation level - re = regexp.MustCompile(`\n[\t ]+`) + re = cachedregexp.MustCompile(`\n[\t ]+`) matches := re.FindAllString(str, -1) // 4. remove the common indentation from all strings @@ -41,12 +42,12 @@ func dedent(t *testing.T, str string) string { } } - re := regexp.MustCompile(`\n[\t ]{` + fmt.Sprint(size) + `}`) + re := cachedregexp.MustCompile(`\n[\t ]{` + fmt.Sprint(size) + `}`) str = re.ReplaceAllString(str, "\n") } // 5. Remove leading whitespace. - re = regexp.MustCompile(`^\r?\n`) + re = cachedregexp.MustCompile(`^\r?\n`) str = re.ReplaceAllString(str, "") return str @@ -60,7 +61,7 @@ func areEqual(t *testing.T, actual, expect string) bool { expect = regexp.QuoteMeta(expect) expect = strings.ReplaceAll(expect, "%%", ".+") - re := regexp.MustCompile(`^` + expect + `$`) + re := cachedregexp.MustCompile(`^` + expect + `$`) return re.MatchString(actual) } @@ -85,7 +86,7 @@ func normalizeFilePaths(output string) string { // the number of vulnerabilities and the time that the database was last updated) // in the output with %% wildcards, in order to reduce the noise of the cmp diff func wildcardDatabaseStats(str string) string { - re := regexp.MustCompile(`(\w+) \(\d+ vulnerabilities, including withdrawn - last updated \w{3}, \d\d \w{3} \d{4} [012]\d:\d\d:\d\d GMT\)`) + re := cachedregexp.MustCompile(`(\w+) \(\d+ vulnerabilities, including withdrawn - last updated \w{3}, \d\d \w{3} \d{4} [012]\d:\d\d:\d\d GMT\)`) return re.ReplaceAllString(str, "$1 (%% vulnerabilities, including withdrawn - last updated %%)") } @@ -1384,7 +1385,7 @@ func TestRun_EndToEnd(t *testing.T) { } tests := make([]cliTestCase, 0, len(files)/2) - re := regexp.MustCompile(`\d+-(.*)`) + re := cachedregexp.MustCompile(`\d+-(.*)`) for _, f := range files { if strings.HasSuffix(f.Name(), ".out.txt") { diff --git a/pkg/database/osv.go b/pkg/database/osv.go index dc0cc408..3cdb9c24 100644 --- a/pkg/database/osv.go +++ b/pkg/database/osv.go @@ -4,10 +4,10 @@ import ( "encoding/json" "fmt" "github.com/g-rath/osv-detector/internal" + "github.com/g-rath/osv-detector/internal/cachedregexp" "github.com/g-rath/osv-detector/pkg/lockfile" "github.com/g-rath/osv-detector/pkg/semantic" "os" - "regexp" "sort" "strings" "time" @@ -43,7 +43,7 @@ func (p Package) NormalizedName() string { } // per https://www.python.org/dev/peps/pep-0503/#normalized-names - name := regexp.MustCompile(`[-_.]+`).ReplaceAllString(p.Name, "-") + name := cachedregexp.MustCompile(`[-_.]+`).ReplaceAllString(p.Name, "-") return strings.ToLower(name) } diff --git a/pkg/lockfile/parse-gemfile-lock.go b/pkg/lockfile/parse-gemfile-lock.go index e03efbf2..7bc0893c 100644 --- a/pkg/lockfile/parse-gemfile-lock.go +++ b/pkg/lockfile/parse-gemfile-lock.go @@ -2,9 +2,9 @@ package lockfile import ( "fmt" + "github.com/g-rath/osv-detector/internal/cachedregexp" "log" "os" - "regexp" "strings" ) @@ -55,8 +55,8 @@ func (parser *gemfileLockfileParser) addDependency(name string, version string) } func (parser *gemfileLockfileParser) parseSpec(line string) { - // nameVersionReg := regexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`) - nameVersionReg := regexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`) + // nameVersionReg := cachedregexp.MustCompile(`^( {2}| {4}| {6})(?! )(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`) + nameVersionReg := cachedregexp.MustCompile(`^( +)(.*?)(?: \(([^-]*)(?:-(.*))?\))?(!)?$`) results := nameVersionReg.FindStringSubmatch(line) @@ -82,7 +82,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) { } // OPTIONS = /^ ([a-z]+): (.*)$/i.freeze - optionsRegexp := regexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`) + optionsRegexp := cachedregexp.MustCompile(`(?i)^ {2}([a-z]+): (.*)$`) // todo: support options := optionsRegexp.FindStringSubmatch(line) @@ -105,7 +105,7 @@ func (parser *gemfileLockfileParser) parseSource(line string) { } func isNotIndented(line string) bool { - re := regexp.MustCompile(`^\S`) + re := cachedregexp.MustCompile(`^\S`) return re.MatchString(line) } @@ -127,7 +127,7 @@ func (parser *gemfileLockfileParser) parseLineBasedOnState(line string) { } func (parser *gemfileLockfileParser) parse(contents string) { - lineMatcher := regexp.MustCompile(`(?:\r?\n)+`) + lineMatcher := cachedregexp.MustCompile(`(?:\r?\n)+`) lines := lineMatcher.Split(contents, -1) diff --git a/pkg/lockfile/parse-maven-lock.go b/pkg/lockfile/parse-maven-lock.go index e825be4e..eb98754e 100644 --- a/pkg/lockfile/parse-maven-lock.go +++ b/pkg/lockfile/parse-maven-lock.go @@ -3,8 +3,8 @@ package lockfile import ( "encoding/xml" "fmt" + "github.com/g-rath/osv-detector/internal/cachedregexp" "os" - "regexp" ) type MavenLockDependency struct { @@ -15,7 +15,7 @@ type MavenLockDependency struct { } func (mld MavenLockDependency) parseResolvedVersion(version string) string { - versionRequirementReg := regexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`) + versionRequirementReg := cachedregexp.MustCompile(`[[(]?(.*?)(?:,|[)\]]|$)`) results := versionRequirementReg.FindStringSubmatch(version) @@ -27,7 +27,7 @@ func (mld MavenLockDependency) parseResolvedVersion(version string) string { } func (mld MavenLockDependency) resolveVersionValue(lockfile MavenLockFile) string { - interpolationReg := regexp.MustCompile(`\${(.+)}`) + interpolationReg := cachedregexp.MustCompile(`\${(.+)}`) results := interpolationReg.FindStringSubmatch(mld.Version) diff --git a/pkg/lockfile/parse-mix-lock.go b/pkg/lockfile/parse-mix-lock.go index dc1eeb74..72facd28 100644 --- a/pkg/lockfile/parse-mix-lock.go +++ b/pkg/lockfile/parse-mix-lock.go @@ -3,8 +3,8 @@ package lockfile import ( "bufio" "fmt" + "github.com/g-rath/osv-detector/internal/cachedregexp" "os" - "regexp" "strings" ) @@ -17,7 +17,7 @@ func ParseMixLock(pathToLockfile string) ([]PackageDetails, error) { } defer file.Close() - re := regexp.MustCompile(`^ +"(\w+)": \{.+,$`) + re := cachedregexp.MustCompile(`^ +"(\w+)": \{.+,$`) scanner := bufio.NewScanner(file) diff --git a/pkg/lockfile/parse-pnpm-lock.go b/pkg/lockfile/parse-pnpm-lock.go index 5b04b6e0..412f9be1 100644 --- a/pkg/lockfile/parse-pnpm-lock.go +++ b/pkg/lockfile/parse-pnpm-lock.go @@ -2,9 +2,9 @@ package lockfile import ( "fmt" + "github.com/g-rath/osv-detector/internal/cachedregexp" "gopkg.in/yaml.v2" "os" - "regexp" "strconv" "strings" ) @@ -54,7 +54,7 @@ func (l *PnpmLockfile) UnmarshalYAML(unmarshal func(interface{}) error) error { const PnpmEcosystem = NpmEcosystem func startsWithNumber(str string) bool { - matcher := regexp.MustCompile(`^\d`) + matcher := cachedregexp.MustCompile(`^\d`) return matcher.MatchString(str) } @@ -105,7 +105,7 @@ func extractPnpmPackageNameAndVersion(dependencyPath string) (string, string) { func parseNameAtVersion(value string) (name string, version string) { // look for pattern "name@version", where name is allowed to contain zero or more "@" - matches := regexp.MustCompile(`^(.+)@([\w.-]+)(?:\(|$)`).FindStringSubmatch(value) + matches := cachedregexp.MustCompile(`^(.+)@([\w.-]+)(?:\(|$)`).FindStringSubmatch(value) if len(matches) != 3 { return name, "" @@ -139,7 +139,7 @@ func parsePnpmLock(lockfile PnpmLockfile) []PackageDetails { commit := pkg.Resolution.Commit if strings.HasPrefix(pkg.Resolution.Tarball, "https://codeload.github.com") { - re := regexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`) + re := cachedregexp.MustCompile(`https://codeload\.github\.com(?:/[\w-.]+){2}/tar\.gz/(\w+)$`) matched := re.FindStringSubmatch(pkg.Resolution.Tarball) if matched != nil { diff --git a/pkg/lockfile/parse-requirements-txt.go b/pkg/lockfile/parse-requirements-txt.go index 5db7cdeb..003e256f 100644 --- a/pkg/lockfile/parse-requirements-txt.go +++ b/pkg/lockfile/parse-requirements-txt.go @@ -3,9 +3,9 @@ package lockfile import ( "bufio" "fmt" + "github.com/g-rath/osv-detector/internal/cachedregexp" "os" "path/filepath" - "regexp" "strings" ) @@ -67,7 +67,7 @@ func parseLine(line string) PackageDetails { // than false negatives, and can be dealt with when/if it actually happens. func normalizedRequirementName(name string) string { // per https://www.python.org/dev/peps/pep-0503/#normalized-names - name = regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-") + name = cachedregexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-") name = strings.ToLower(name) name = strings.Split(name, "[")[0] @@ -75,7 +75,7 @@ func normalizedRequirementName(name string) string { } func removeComments(line string) string { - var re = regexp.MustCompile(`(^|\s+)#.*$`) + var re = cachedregexp.MustCompile(`(^|\s+)#.*$`) return strings.TrimSpace(re.ReplaceAllString(line, "")) } @@ -95,7 +95,7 @@ func isNotRequirementLine(line string) bool { func isLineContinuation(line string) bool { // checks that the line ends with an odd number of backslashes, // meaning the last one isn't escaped - var re = regexp.MustCompile(`([^\\]|^)(\\{2})*\\$`) + var re = cachedregexp.MustCompile(`([^\\]|^)(\\{2})*\\$`) return re.MatchString(line) } diff --git a/pkg/lockfile/parse-yarn-lock.go b/pkg/lockfile/parse-yarn-lock.go index b11715e0..05e00520 100644 --- a/pkg/lockfile/parse-yarn-lock.go +++ b/pkg/lockfile/parse-yarn-lock.go @@ -3,9 +3,9 @@ package lockfile import ( "bufio" "fmt" + "github.com/g-rath/osv-detector/internal/cachedregexp" "net/url" "os" - "regexp" "strings" ) @@ -63,7 +63,7 @@ func extractYarnPackageName(str string) string { } func determineYarnPackageVersion(group []string) string { - re := regexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`) + re := cachedregexp.MustCompile(`^ {2}"?version"?:? "?([\w-.]+)"?$`) for _, s := range group { matched := re.FindStringSubmatch(s) @@ -78,7 +78,7 @@ func determineYarnPackageVersion(group []string) string { } func determineYarnPackageResolution(group []string) string { - re := regexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`) + re := cachedregexp.MustCompile(`^ {2}"?(?:resolution:|resolved)"? "([^ '"]+)"$`) for _, s := range group { matched := re.FindStringSubmatch(s) @@ -111,7 +111,7 @@ func tryExtractCommit(resolution string) string { } for _, matcher := range matchers { - re := regexp.MustCompile(matcher) + re := cachedregexp.MustCompile(matcher) matched := re.FindStringSubmatch(resolution) if matched != nil { diff --git a/pkg/semantic/version-maven.go b/pkg/semantic/version-maven.go index 9cf5c9e5..56292438 100644 --- a/pkg/semantic/version-maven.go +++ b/pkg/semantic/version-maven.go @@ -2,7 +2,7 @@ package semantic import ( "fmt" - "regexp" + "github.com/g-rath/osv-detector/internal/cachedregexp" "sort" "strings" ) @@ -175,11 +175,11 @@ func (mv MavenVersion) lessThan(mw MavenVersion) bool { // According to Maven's implementation, any non-digit is a "character": // https://github.com/apache/maven/blob/965aaa53da5c2d814e94a41d37142d0d6830375d/maven-artifact/src/main/java/org/apache/maven/artifact/versioning/ComparableVersion.java#L627 func mavenFindTransitions(token string) (ints []int) { - for _, span := range regexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) { + for _, span := range cachedregexp.MustCompile(`\D\d`).FindAllStringIndex(token, -1) { ints = append(ints, span[0]+1) } - for _, span := range regexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) { + for _, span := range cachedregexp.MustCompile(`\d\D`).FindAllStringIndex(token, -1) { ints = append(ints, span[0]+1) } diff --git a/pkg/semantic/version-packagist.go b/pkg/semantic/version-packagist.go index 1dad01ac..d766525a 100644 --- a/pkg/semantic/version-packagist.go +++ b/pkg/semantic/version-packagist.go @@ -1,7 +1,7 @@ package semantic import ( - "regexp" + "github.com/g-rath/osv-detector/internal/cachedregexp" "strconv" "strings" ) @@ -15,9 +15,9 @@ func canonicalizePackagistVersion(v string) string { // the trimming...) v = strings.TrimPrefix(strings.TrimPrefix(v, "v"), "V") - v = regexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".") - v = regexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2") - v = regexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2") + v = cachedregexp.MustCompile(`[-_+]`).ReplaceAllString(v, ".") + v = cachedregexp.MustCompile(`([^\d.])(\d)`).ReplaceAllString(v, "$1.$2") + v = cachedregexp.MustCompile(`(\d)([^\d.])`).ReplaceAllString(v, "$1.$2") return v } diff --git a/pkg/semantic/version-pypi.go b/pkg/semantic/version-pypi.go index 58fa2c63..582ffbae 100644 --- a/pkg/semantic/version-pypi.go +++ b/pkg/semantic/version-pypi.go @@ -2,8 +2,8 @@ package semantic import ( "fmt" + "github.com/g-rath/osv-detector/internal/cachedregexp" "math/big" - "regexp" "strings" ) @@ -67,7 +67,7 @@ func parseLetterVersion(letter, number string) letterAndNumber { } func parseLocalVersion(local string) (parts []string) { - for _, part := range regexp.MustCompile(`[._-]`).Split(local, -1) { + for _, part := range cachedregexp.MustCompile(`[._-]`).Split(local, -1) { parts = append(parts, strings.ToLower(part)) } @@ -88,7 +88,7 @@ func normalizePyPILegacyPart(part string) string { part = "@" } - if regexp.MustCompile(`\d`).MatchString(part[:1]) { + if cachedregexp.MustCompile(`\d`).MatchString(part[:1]) { // pad for numeric comparison return fmt.Sprintf("%08s", part) } @@ -97,7 +97,7 @@ func normalizePyPILegacyPart(part string) string { } func parsePyPIVersionParts(str string) (parts []string) { - re := regexp.MustCompile(`(\d+|[a-z]+|\.|-)`) + re := cachedregexp.MustCompile(`(\d+|[a-z]+|\.|-)`) splits := re.FindAllString(str, -1) splits = append(splits, "final") @@ -137,7 +137,7 @@ func parsePyPIVersion(str string) PyPIVersion { str = strings.ToLower(str) // from https://peps.python.org/pep-0440/#appendix-b-parsing-version-strings-with-regular-expressions - re := regexp.MustCompile(`^\s*v?(?:(?:(?P[0-9]+)!)?(?P[0-9]+(?:\.[0-9]+)*)(?P
[-_\.]?(?P(a|b|c|rc|alpha|beta|pre|preview))[-_\.]?(?P[0-9]+)?)?(?P(?:-(?P[0-9]+))|(?:[-_\.]?(?Ppost|rev|r)[-_\.]?(?P[0-9]+)?))?(?P[-_\.]?(?Pdev)[-_\.]?(?P[0-9]+)?)?)(?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?\s*$`)
+	re := cachedregexp.MustCompile(`^\s*v?(?:(?:(?P[0-9]+)!)?(?P[0-9]+(?:\.[0-9]+)*)(?P
[-_\.]?(?P(a|b|c|rc|alpha|beta|pre|preview))[-_\.]?(?P[0-9]+)?)?(?P(?:-(?P[0-9]+))|(?:[-_\.]?(?Ppost|rev|r)[-_\.]?(?P[0-9]+)?))?(?P[-_\.]?(?Pdev)[-_\.]?(?P[0-9]+)?)?)(?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?\s*$`)
 	match := re.FindStringSubmatch(str)
 
 	if len(match) == 0 {
diff --git a/pkg/semantic/version-semver-like.go b/pkg/semantic/version-semver-like.go
index d9480192..252c509a 100644
--- a/pkg/semantic/version-semver-like.go
+++ b/pkg/semantic/version-semver-like.go
@@ -2,8 +2,8 @@ package semantic
 
 import (
 	"fmt"
+	"github.com/g-rath/osv-detector/internal/cachedregexp"
 	"math/big"
-	"regexp"
 	"strings"
 )
 
@@ -55,7 +55,7 @@ func parseSemverLike(line string) SemverLikeVersion {
 	var components []*big.Int
 	originStr := line
 
-	numberReg := regexp.MustCompile(`\d`)
+	numberReg := cachedregexp.MustCompile(`\d`)
 
 	currentCom := ""
 	foundBuild := false