From 62e7ea075558b105c424aa06a7acf483cbaa2728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Mart=C3=AD?= Date: Fri, 18 Mar 2022 16:53:42 +0000 Subject: [PATCH] overhaul how we load module info to format Go files We would call "go mod edit -json" for each Go file we formatted, as each file may be in a different directory, and thus inside a different module. However, the first mistake is that we always ran the command in the directory where gofumpt is run, not the directory containing each Go file. Our tests weren't strict enough to catch this; now octal-literal.txt is, via its run of gofmt before it calls cd. The second mistake, and a pretty embarrassing one, is that since v0.3.0 made gofumpt concurrent, it has been racy in how it writes to globals like langVersion from multiple goroutines. octal-literals.txt now tests for this by adding a nested Go module. Finally, we could also run into open file limits, because spawning a child process and grabbing its output opens files of its own such as named pipes. The added test shows this with a limit of 256 and 10k tiny Go files: --- FAIL: TestWithLowOpenFileLimit (0.30s) ulimit_unix_test.go:82: writing 10000 tiny Go files ulimit_unix_test.go:104: running with 1 paths ulimit_unix_test.go:104: running with 10000 paths ulimit_unix_test.go:112: error: got non-nil error comment: stderr: open /tmp/TestWithLowOpenFileLimit2753748366/001/p003/014.go: too many open files open /tmp/TestWithLowOpenFileLimit2753748366/001/p003/017.go: too many open files open /tmp/TestWithLowOpenFileLimit2753748366/001/p004/000.go: too many open files open /tmp/TestWithLowOpenFileLimit2753748366/001/p004/019.go: too many open files Instead, only call "go mod edit -json" once per directory, and do it in the main thread to reduce its parallelism. Also make it grab fdSem as well, for good measure. This may not be a complete fix, as we're not sure how many files are open by an exec.Command.Output call. However, we are no longer able to reproduce a failure, so leave that as a TODO. Fixes #208. --- gofmt.go | 70 ++++++++++++++++----- testdata/scripts/octal-literals.txt | 43 ++++++++----- testdata/scripts/workspaces.txt | 30 ++++----- ulimit_unix_test.go | 94 +++++++++++++++++++++++++++++ 4 files changed, 193 insertions(+), 44 deletions(-) create mode 100644 ulimit_unix_test.go diff --git a/gofmt.go b/gofmt.go index 4d29520..6390d83 100644 --- a/gofmt.go +++ b/gofmt.go @@ -23,6 +23,7 @@ import ( "runtime" "runtime/pprof" "strings" + "sync" "golang.org/x/sync/semaphore" exec "golang.org/x/sys/execabs" @@ -287,21 +288,18 @@ func processFile(filename string, info fs.FileInfo, in io.Reader, r *reporter, e // Apply gofumpt's changes before we print the code in gofumpt's format. // If either -lang or -modpath aren't set, fetch them from go.mod. - if *langVersion == "" || *modulePath == "" { - out, err := exec.Command("go", "mod", "edit", "-json").Output() - if err == nil && len(out) > 0 { - var mod struct { - Go string - Module struct { - Path string - } - } - _ = json.Unmarshal(out, &mod) - if *langVersion == "" { - *langVersion = mod.Go + lang := *langVersion + modpath := *modulePath + if lang == "" || modpath == "" { + dir := filepath.Dir(filename) + mod, ok := moduleCacheByDir.Load(dir) + if ok && mod != nil { + mod := mod.(*module) + if lang == "" { + lang = mod.Go } - if *modulePath == "" { - *modulePath = mod.Module.Path + if modpath == "" { + modpath = mod.Module.Path } } } @@ -311,8 +309,8 @@ func processFile(filename string, info fs.FileInfo, in io.Reader, r *reporter, e // We also skip walking vendor directories entirely, but that happens elsewhere. if explicit || !isGenerated(file) { gformat.File(fileSet, file, gformat.Options{ - LangVersion: *langVersion, - ModulePath: *modulePath, + LangVersion: lang, + ModulePath: modpath, ExtraRules: *extraRules, }) } @@ -532,7 +530,47 @@ func gofmtMain(s *sequencer) { } } +type module struct { + Go string + Module struct { + Path string + } +} + +func loadModuleInfo(dir string) interface{} { + cmd := exec.Command("go", "mod", "edit", "-json") + cmd.Dir = dir + + // Spawning "go mod edit" will open files by design, + // such as the named pipe to obtain stdout. + // TODO(mvdan): if we run into "too many open files" errors again in the + // future, we probably need to turn fdSem into a weighted semaphore so this + // operation can acquire a weight larger than 1. + fdSem <- true + out, err := cmd.Output() + defer func() { <-fdSem }() + + if err != nil || len(out) == 0 { + return nil + } + mod := new(module) + if err := json.Unmarshal(out, mod); err != nil { + return nil + } + return mod +} + +// Written to by fileWeight, read from fileWeight and processFile. +// A present but nil value means that loading the module info failed. +// Note that we don't require the keys to be absolute directories, +// so duplicates are possible. The same can happen with symlinks. +var moduleCacheByDir sync.Map // map[dirString]*module + func fileWeight(path string, info fs.FileInfo) int64 { + dir := filepath.Dir(path) + if _, ok := moduleCacheByDir.Load(dir); !ok { + moduleCacheByDir.Store(dir, loadModuleInfo(dir)) + } if info == nil { return exclusive } diff --git a/testdata/scripts/octal-literals.txt b/testdata/scripts/octal-literals.txt index 15f483f..cc153c5 100644 --- a/testdata/scripts/octal-literals.txt +++ b/testdata/scripts/octal-literals.txt @@ -1,31 +1,33 @@ -cd module -cp foo.go foo.go.orig - # Initially, the Go language version is too low. -gofumpt foo.go -cmp stdout foo.go.orig +gofumpt -l . +! stdout . # We can give an explicitly newer version. -gofumpt -lang=1.13 foo.go -cmp stdout foo.go.golden +gofumpt -lang=1.13 -l . +stdout -count=1 'foo\.go' +stdout -count=1 'nested[/\\]nested\.go' # If we bump the version in go.mod, it should be picked up. exec go mod edit -go=1.13 +gofumpt -l . +stdout -count=1 'foo\.go' +! stdout 'nested' + +# Ensure we produce the output we expect, and that it's stable. gofumpt foo.go cmp stdout foo.go.golden - gofumpt -d foo.go.golden ! stdout . -# We can give an explicitly older version. -gofumpt -lang=v1 foo.go -cmp stdout foo.go.orig +# We can give an explicitly older version, too +gofumpt -lang=v1 -l . +! stdout . --- module/go.mod -- +-- go.mod -- module test go 1.12 --- module/foo.go -- +-- foo.go -- package p const ( @@ -34,7 +36,7 @@ const ( k = 0o_7_5_5 l = 1022 ) --- module/foo.go.golden -- +-- foo.go.golden -- package p const ( @@ -43,3 +45,16 @@ const ( k = 0o_7_5_5 l = 1022 ) +-- nested/go.mod -- +module nested + +go 1.11 +-- nested/nested.go -- +package p + +const ( + i = 0 + j = 022 + k = 0o_7_5_5 + l = 1022 +) diff --git a/testdata/scripts/workspaces.txt b/testdata/scripts/workspaces.txt index 5a3ca87..163b3d9 100644 --- a/testdata/scripts/workspaces.txt +++ b/testdata/scripts/workspaces.txt @@ -1,22 +1,16 @@ [!go1.18] skip -# Octal literal prefixes were introduced in Go 1.13. If we are outside of the -# module, language version should not be set. -gofumpt a/go112 -cmp stdout a/go112 +# Whether we run gofumpt from inside or outside a module, +# we should always use the information from its go.mod. +# We also test that we don't get confused by the presence of go.work. -cd a -gofumpt go112 -cmp stdout go113 - --- a/go112 -- -package main +gofumpt a/go112.go +cmp stdout a/go113.go -const x = 0777 --- a/go113 -- -package main +cd a +gofumpt go112.go +cmp stdout go113.go -const x = 0o777 -- go.work -- go 1.18 use ./a @@ -26,6 +20,14 @@ module a go 1.18 -- a/a.go -- package a +-- a/go112.go -- +package main + +const x = 0777 +-- a/go113.go -- +package main + +const x = 0o777 -- b/go.mod -- module b go 1.18 diff --git a/ulimit_unix_test.go b/ulimit_unix_test.go new file mode 100644 index 0000000..97ff277 --- /dev/null +++ b/ulimit_unix_test.go @@ -0,0 +1,94 @@ +// Copyright (c) 2019, Daniel Martí +// See LICENSE for licensing information + +// TODO: replace with the unix build tag once we require Go 1.19 or later +//go:build linux + +package main + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "path/filepath" + "strconv" + "testing" + + qt "github.com/frankban/quicktest" + "golang.org/x/sys/unix" +) + +func init() { + // Here rather than in TestMain, to reuse the unix build tag. + if limit := os.Getenv("TEST_WITH_FILE_LIMIT"); limit != "" { + n, err := strconv.ParseUint(limit, 10, 64) + if err != nil { + panic(err) + } + rlimit := unix.Rlimit{Cur: n, Max: n} + if err := unix.Setrlimit(unix.RLIMIT_NOFILE, &rlimit); err != nil { + panic(err) + } + os.Exit(main1()) + } +} + +func TestWithLowOpenFileLimit(t *testing.T) { + // Safe to run in parallel, as we only change the limit for child processes. + t.Parallel() + + tempDir := t.TempDir() + testBinary, err := os.Executable() + qt.Assert(t, err, qt.IsNil) + + const ( + // Enough directories to run into the ulimit. + // Enough number of files in total to run into the ulimit. + numberDirs = 500 + numberFilesPerDir = 20 + numberFilesTotal = numberDirs * numberFilesPerDir + ) + t.Logf("writing %d tiny Go files", numberFilesTotal) + var allGoFiles []string + for i := 0; i < numberDirs; i++ { + // Prefix "p", so the package name is a valid identifier. + // Add one go.mod file per directory as well, + // which will help catch data races when loading module info. + dirName := fmt.Sprintf("p%03d", i) + dirPath := filepath.Join(tempDir, dirName) + err := os.MkdirAll(dirPath, 0o777) + qt.Assert(t, err, qt.IsNil) + + err = os.WriteFile(filepath.Join(dirPath, "go.mod"), + []byte(fmt.Sprintf("module %s\n\ngo 1.16", dirName)), 0o666) + qt.Assert(t, err, qt.IsNil) + + for j := 0; j < numberFilesPerDir; j++ { + filePath := filepath.Join(dirPath, fmt.Sprintf("%03d.go", j)) + err := os.WriteFile(filePath, + // Extra newlines so that "-l" prints all paths. + []byte(fmt.Sprintf("package %s\n\n\n", dirName)), 0o666) + qt.Assert(t, err, qt.IsNil) + allGoFiles = append(allGoFiles, filePath) + } + } + if len(allGoFiles) != numberFilesTotal { + panic("allGoFiles doesn't have the expected number of files?") + } + runGofmt := func(paths ...string) { + t.Logf("running with %d paths", len(paths)) + cmd := exec.Command(testBinary, append([]string{"-l"}, paths...)...) + // 256 is a relatively common low limit, e.g. on Mac. + cmd.Env = append(os.Environ(), "TEST_WITH_FILE_LIMIT=256") + out, err := cmd.Output() + var stderr []byte + if err, _ := err.(*exec.ExitError); err != nil { + stderr = err.Stderr + } + qt.Assert(t, err, qt.IsNil, qt.Commentf("stderr:\n%s", stderr)) + qt.Assert(t, bytes.Count(out, []byte("\n")), qt.Equals, len(allGoFiles)) + } + runGofmt(tempDir) + runGofmt(allGoFiles...) +}