Skip to content

Commit

Permalink
getSnippetHash: Use regexp instead of parsing whole AST (#759)
Browse files Browse the repository at this point in the history
* `getSnippetHash`: Use regexp instead of parsing whole AST
When calculating the env hash to use as cache keys, we don't need an exact list of imports
Therefore, to improve performance, we can replace the current strategy of using the AST by a regexp strategy.
This new strategy can lead to false positives (e.g. a string containing import 'foo') but this is not an issue for the hashing function. All that matters is that the hash is consistent and accounts for all files (no false negatives)

* Check for Abs errors
  • Loading branch information
julienduchesne committed Sep 19, 2022
1 parent 12e8cf0 commit 0aba526
Showing 1 changed file with 31 additions and 11 deletions.
42 changes: 31 additions & 11 deletions pkg/jsonnet/imports.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"os"
"path/filepath"
"regexp"
"sort"
"sync"

Expand All @@ -18,6 +19,8 @@ import (
"github.com/grafana/tanka/pkg/jsonnet/native"
)

var importsRegexp = regexp.MustCompile(`import(str)?\s+['"]([^'"%()]+)['"]`)

// TransitiveImports returns all recursive imports of an environment
func TransitiveImports(dir string) ([]string, error) {
dir, err := filepath.Abs(dir)
Expand Down Expand Up @@ -155,9 +158,8 @@ var fileHashes sync.Map
// and the content of all of its dependencies.
// File hashes are cached in-memory to optimize multiple executions of this function in a process
func getSnippetHash(vm *jsonnet.VM, path, data string) (string, error) {
node, _ := jsonnet.SnippetToAST(path, data)
result := map[string]bool{}
if err := importRecursive(result, vm, node, path, true); err != nil {
if err := findImportRecursiveRegexp(result, vm, path, data); err != nil {
return "", err
}
fileNames := []string{}
Expand Down Expand Up @@ -187,16 +189,34 @@ func getSnippetHash(vm *jsonnet.VM, path, data string) (string, error) {
return base64.URLEncoding.EncodeToString(fullHasher.Sum(nil)), nil
}

func uniqueStringSlice(s []string) []string {
seen := make(map[string]struct{}, len(s))
j := 0
for _, v := range s {
if _, ok := seen[v]; ok {
// findImportRecursiveRegexp does the same as `importRecursive` but uses a regexp
// rather than parsing the AST of all files. This is much faster, but can lead to
// false positives (e.g. if a string contains `import "foo"`).
func findImportRecursiveRegexp(list map[string]bool, vm *jsonnet.VM, filename, content string) error {
matches := importsRegexp.FindAllStringSubmatch(content, -1)

for _, match := range matches {
importContents, foundAt, err := vm.ImportData(filename, match[2])
if err != nil {
continue
}
abs, err := filepath.Abs(foundAt)
if err != nil {
return err
}

if list[abs] {
return nil
}
list[abs] = true

if match[1] == "str" {
continue
}
seen[v] = struct{}{}
s[j] = v
j++

if err := findImportRecursiveRegexp(list, vm, abs, importContents); err != nil {
return err
}
}
return s[:j]
return nil
}

0 comments on commit 0aba526

Please sign in to comment.