From 314f21d8579ff88f3c11fe4cc28420e41b72dd5d Mon Sep 17 00:00:00 2001 From: lornasong Date: Fri, 20 May 2022 03:40:39 -0400 Subject: [PATCH] Test for runTerraformCmd leaked go-routine (#299) * Update TestContext_sleepTimeoutExpired to check for canceling within timeframe Add a timeout to the test to ensure that the terraform apply cancels within a reasonable time of the 5s timeout. Currently, this test is not canceling the terraform apply as expected. In the logs you can see that the test takes 1 min rather than ~5s: ``` --- PASS: TestContext_sleepTimeoutExpired/sleep-0.12.31 (62.13s) ``` ``` === RUN TestContext_sleepTimeoutExpired/sleep-0.12.31 util_test.go:113: [INFO] running Terraform command: /var/folders/6y/gy9gggt14379c_k39vwb50lc0000gn/T/terraform_1378921380/terraform apply -no-color -auto-approve -input=false -lock=true -parallelism=10 -refresh=true util_test.go:103: CLI Output: // truncated ... time_sleep.sleep: Creating... time_sleep.sleep: Still creating... [10s elapsed] time_sleep.sleep: Still creating... [20s elapsed] time_sleep.sleep: Still creating... [30s elapsed] time_sleep.sleep: Still creating... [41s elapsed] time_sleep.sleep: Still creating... [51s elapsed] time_sleep.sleep: Creation complete after 1m0s [id=2022-05-06T17:40:20Z] Apply complete! Resources: 1 added, 0 changed, 0 destroyed. ``` * Remove runTerraformCmd check for cmd.ProcessState Processes were not being killed because cmd.ProcessState was nil. With this change, processes will be able to make the request to Kill(). Added a temporary log to printout cmd.ProcessState to demonstrate. Will be removed in next commit. Note: this will cause hanging `TestContext_sleepTimeoutExpired` due to a known Golang issue with killing a command when Stdout or Stderr are set to anything besides `nil` or `*os.File`. This is because the Kill does not notify the stdout/stderr subprocesses to stop. `cmd.Wait` (called by `cmd.Run`) waits indefinitely for those subprocesses to stop. * Read logs from Stderr/out Pipe to avoid hanging `TestContext_sleepTimeoutExpired` can occasionally hang when killing a command that has Stdout or Stderr set to anything besides `nil` or `*os.File`. https://github.com/golang/go/issues/23019 Use workaround to read from StdoutPipe and StderrPipe rather than setting Stdout / Stderr * Test for runTerraformCmd leaked go-routine Currently, when runTerraformCmd is called, it starts a go-routine to kill the Terraform CLI on context.Done(). However, when the Terraform CLI completes and runTerraformCmd() finishes, the go-routine continues running unnecessarily. If the caller cancels the context down the line, this will stop the go-routine and it will log the error: "error from kill: os: process already finished" because the Terraform CLI has already finished. Added a test for this in cmd_default.go and cmd_linux.go. Have not tried it in linux yet though. When running with the race detector: ``` ================== WARNING: DATA RACE Read at 0x00c0002516c8 by goroutine 7: bytes.(*Buffer).String() /usr/local/go/src/bytes/buffer.go:65 +0x36a github.com/hashicorp/terraform-exec/tfexec.Test_runTerraformCmd_default() /Users/lornasong/go/src/github.com/hashicorp/terraform-exec/tfexec/cmd_default_test.go:35 +0x360 testing.tRunner() // truncated ... Previous write at 0x00c0002516c8 by goroutine 8: bytes.(*Buffer).grow() /usr/local/go/src/bytes/buffer.go:147 +0x3b1 bytes.(*Buffer).Write() /usr/local/go/src/bytes/buffer.go:172 +0xcd log.(*Logger).Output() /usr/local/go/src/log/log.go:184 +0x466 log.(*Logger).Printf() /usr/local/go/src/log/log.go:191 +0x6e github.com/hashicorp/terraform-exec/tfexec.(*Terraform).runTerraformCmd.func1() /Users/lornasong/go/src/github.com/hashicorp/terraform-exec/tfexec/cmd_default.go:24 +0x2a5 // truncated ... ================== * Use CommandContext to kill instead of manually doing it * Fix EOF error check to use error.Is() This also fixes a race condition caused by using the pointer to the io.EOF * Update tests to use separate string.Builder-s for stdout and stderr string.Builder is a non-comparable type which is not safe for concurrent use when shared by Cmd.Stdout and Cmd.Stderr. Causes a race condition when accessing the the builder when Cmd is running. * Fixes to runTerraformCmd for race conditions - Use waitgroups for more readability - Improve handling errors from writeOutput - Finish reading from pipes before calling cmd.Wait - fixes a race condition that leads to an error :`read |0: file already closed` - Because now waiting for pipes to finish reading, need to update waitGroup to close buf.Read on context cancel. Otherwise buf.Read blocks until next line before stopping. Causes TestContext_sleepTimeoutExpired takes a little too long to cancel (~20s) Co-authored-by: Kyle Carberry --- tfexec/cmd.go | 37 ++++++++++++- tfexec/cmd_default.go | 71 ++++++++++++++++++------ tfexec/cmd_default_test.go | 39 ++++++++++++++ tfexec/cmd_linux.go | 74 +++++++++++++++++++------- tfexec/cmd_linux_test.go | 36 +++++++++++++ tfexec/internal/e2etest/errors_test.go | 22 +++++--- tfexec/internal/e2etest/util_test.go | 13 +++-- 7 files changed, 245 insertions(+), 47 deletions(-) create mode 100644 tfexec/cmd_default_test.go create mode 100644 tfexec/cmd_linux_test.go diff --git a/tfexec/cmd.go b/tfexec/cmd.go index 21243624..df7cd499 100644 --- a/tfexec/cmd.go +++ b/tfexec/cmd.go @@ -1,9 +1,11 @@ package tfexec import ( + "bufio" "bytes" "context" "encoding/json" + "errors" "fmt" "io" "io/ioutil" @@ -170,7 +172,7 @@ func (tf *Terraform) buildEnv(mergeEnv map[string]string) []string { } func (tf *Terraform) buildTerraformCmd(ctx context.Context, mergeEnv map[string]string, args ...string) *exec.Cmd { - cmd := exec.Command(tf.execPath, args...) + cmd := exec.CommandContext(ctx, tf.execPath, args...) cmd.Env = tf.buildEnv(mergeEnv) cmd.Dir = tf.workingDir @@ -229,3 +231,36 @@ func mergeWriters(writers ...io.Writer) io.Writer { } return io.MultiWriter(compact...) } + +func writeOutput(ctx context.Context, r io.ReadCloser, w io.Writer) error { + // ReadBytes will block until bytes are read, which can cause a delay in + // returning even if the command's context has been canceled. Use a separate + // goroutine to prompt ReadBytes to return on cancel + closeCtx, closeCancel := context.WithCancel(ctx) + defer closeCancel() + go func() { + select { + case <-ctx.Done(): + r.Close() + case <-closeCtx.Done(): + return + } + }() + + buf := bufio.NewReader(r) + for { + line, err := buf.ReadBytes('\n') + if len(line) > 0 { + if _, err := w.Write(line); err != nil { + return err + } + } + if err != nil { + if errors.Is(err, io.EOF) { + return nil + } + + return err + } + } +} diff --git a/tfexec/cmd_default.go b/tfexec/cmd_default.go index 08a65bcd..6d7b768e 100644 --- a/tfexec/cmd_default.go +++ b/tfexec/cmd_default.go @@ -7,26 +7,12 @@ import ( "context" "os/exec" "strings" + "sync" ) func (tf *Terraform) runTerraformCmd(ctx context.Context, cmd *exec.Cmd) error { var errBuf strings.Builder - cmd.Stdout = mergeWriters(cmd.Stdout, tf.stdout) - cmd.Stderr = mergeWriters(cmd.Stderr, tf.stderr, &errBuf) - - go func() { - <-ctx.Done() - if ctx.Err() == context.DeadlineExceeded || ctx.Err() == context.Canceled { - if cmd != nil && cmd.Process != nil && cmd.ProcessState != nil { - err := cmd.Process.Kill() - if err != nil { - tf.logger.Printf("error from kill: %s", err) - } - } - } - }() - // check for early cancellation select { case <-ctx.Done(): @@ -34,7 +20,52 @@ func (tf *Terraform) runTerraformCmd(ctx context.Context, cmd *exec.Cmd) error { default: } - err := cmd.Run() + // Read stdout / stderr logs from pipe instead of setting cmd.Stdout and + // cmd.Stderr because it can cause hanging when killing the command + // https://github.com/golang/go/issues/23019 + stdoutWriter := mergeWriters(cmd.Stdout, tf.stdout) + stderrWriter := mergeWriters(tf.stderr, &errBuf) + + cmd.Stderr = nil + cmd.Stdout = nil + + stdoutPipe, err := cmd.StdoutPipe() + if err != nil { + return err + } + + stderrPipe, err := cmd.StderrPipe() + if err != nil { + return err + } + + err = cmd.Start() + if err == nil && ctx.Err() != nil { + err = ctx.Err() + } + if err != nil { + return tf.wrapExitError(ctx, err, "") + } + + var errStdout, errStderr error + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + errStdout = writeOutput(ctx, stdoutPipe, stdoutWriter) + }() + + wg.Add(1) + go func() { + defer wg.Done() + errStderr = writeOutput(ctx, stderrPipe, stderrWriter) + }() + + // Reads from pipes must be completed before calling cmd.Wait(). Otherwise + // can cause a race condition + wg.Wait() + + err = cmd.Wait() if err == nil && ctx.Err() != nil { err = ctx.Err() } @@ -42,5 +73,13 @@ func (tf *Terraform) runTerraformCmd(ctx context.Context, cmd *exec.Cmd) error { return tf.wrapExitError(ctx, err, errBuf.String()) } + // Return error if there was an issue reading the std out/err + if errStdout != nil && ctx.Err() != nil { + return tf.wrapExitError(ctx, errStdout, errBuf.String()) + } + if errStderr != nil && ctx.Err() != nil { + return tf.wrapExitError(ctx, errStderr, errBuf.String()) + } + return nil } diff --git a/tfexec/cmd_default_test.go b/tfexec/cmd_default_test.go new file mode 100644 index 00000000..0245ddaa --- /dev/null +++ b/tfexec/cmd_default_test.go @@ -0,0 +1,39 @@ +//go:build !linux +// +build !linux + +package tfexec + +import ( + "bytes" + "context" + "log" + "strings" + "testing" + "time" +) + +func Test_runTerraformCmd_default(t *testing.T) { + // Checks runTerraformCmd for race condition when using + // go test -race -run Test_runTerraformCmd_default ./tfexec + var buf bytes.Buffer + + tf := &Terraform{ + logger: log.New(&buf, "", 0), + execPath: "echo", + } + + ctx, cancel := context.WithCancel(context.Background()) + + cmd := tf.buildTerraformCmd(ctx, nil, "hello tf-exec!") + err := tf.runTerraformCmd(ctx, cmd) + if err != nil { + t.Fatal(err) + } + + // Cancel stops the leaked go routine which logs an error + cancel() + time.Sleep(time.Second) + if strings.Contains(buf.String(), "error from kill") { + t.Fatal("canceling context should not lead to logging an error") + } +} diff --git a/tfexec/cmd_linux.go b/tfexec/cmd_linux.go index 7cbdcb96..6fa40e0a 100644 --- a/tfexec/cmd_linux.go +++ b/tfexec/cmd_linux.go @@ -4,15 +4,13 @@ import ( "context" "os/exec" "strings" + "sync" "syscall" ) func (tf *Terraform) runTerraformCmd(ctx context.Context, cmd *exec.Cmd) error { var errBuf strings.Builder - cmd.Stdout = mergeWriters(cmd.Stdout, tf.stdout) - cmd.Stderr = mergeWriters(cmd.Stderr, tf.stderr, &errBuf) - cmd.SysProcAttr = &syscall.SysProcAttr{ // kill children if parent is dead Pdeathsig: syscall.SIGKILL, @@ -20,21 +18,6 @@ func (tf *Terraform) runTerraformCmd(ctx context.Context, cmd *exec.Cmd) error { Setpgid: true, } - go func() { - <-ctx.Done() - if ctx.Err() == context.DeadlineExceeded || ctx.Err() == context.Canceled { - if cmd != nil && cmd.Process != nil && cmd.ProcessState != nil { - // send SIGINT to process group - err := syscall.Kill(-cmd.Process.Pid, syscall.SIGINT) - if err != nil { - tf.logger.Printf("error from SIGINT: %s", err) - } - } - - // TODO: send a kill if it doesn't respond for a bit? - } - }() - // check for early cancellation select { case <-ctx.Done(): @@ -42,7 +25,52 @@ func (tf *Terraform) runTerraformCmd(ctx context.Context, cmd *exec.Cmd) error { default: } - err := cmd.Run() + // Read stdout / stderr logs from pipe instead of setting cmd.Stdout and + // cmd.Stderr because it can cause hanging when killing the command + // https://github.com/golang/go/issues/23019 + stdoutWriter := mergeWriters(cmd.Stdout, tf.stdout) + stderrWriter := mergeWriters(tf.stderr, &errBuf) + + cmd.Stderr = nil + cmd.Stdout = nil + + stdoutPipe, err := cmd.StdoutPipe() + if err != nil { + return err + } + + stderrPipe, err := cmd.StderrPipe() + if err != nil { + return err + } + + err = cmd.Start() + if err == nil && ctx.Err() != nil { + err = ctx.Err() + } + if err != nil { + return tf.wrapExitError(ctx, err, "") + } + + var errStdout, errStderr error + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + errStdout = writeOutput(ctx, stdoutPipe, stdoutWriter) + }() + + wg.Add(1) + go func() { + defer wg.Done() + errStderr = writeOutput(ctx, stderrPipe, stderrWriter) + }() + + // Reads from pipes must be completed before calling cmd.Wait(). Otherwise + // can cause a race condition + wg.Wait() + + err = cmd.Wait() if err == nil && ctx.Err() != nil { err = ctx.Err() } @@ -50,5 +78,13 @@ func (tf *Terraform) runTerraformCmd(ctx context.Context, cmd *exec.Cmd) error { return tf.wrapExitError(ctx, err, errBuf.String()) } + // Return error if there was an issue reading the std out/err + if errStdout != nil && ctx.Err() != nil { + return tf.wrapExitError(ctx, errStdout, errBuf.String()) + } + if errStderr != nil && ctx.Err() != nil { + return tf.wrapExitError(ctx, errStderr, errBuf.String()) + } + return nil } diff --git a/tfexec/cmd_linux_test.go b/tfexec/cmd_linux_test.go new file mode 100644 index 00000000..471ddf57 --- /dev/null +++ b/tfexec/cmd_linux_test.go @@ -0,0 +1,36 @@ +package tfexec + +import ( + "bytes" + "context" + "log" + "strings" + "testing" + "time" +) + +func Test_runTerraformCmd_linux(t *testing.T) { + // Checks runTerraformCmd for race condition when using + // go test -race -run Test_runTerraformCmd_linux ./tfexec -tags=linux + var buf bytes.Buffer + + tf := &Terraform{ + logger: log.New(&buf, "", 0), + execPath: "echo", + } + + ctx, cancel := context.WithCancel(context.Background()) + + cmd := tf.buildTerraformCmd(ctx, nil, "hello tf-exec!") + err := tf.runTerraformCmd(ctx, cmd) + if err != nil { + t.Fatal(err) + } + + // Cancel stops the leaked go routine which logs an error + cancel() + time.Sleep(time.Second) + if strings.Contains(buf.String(), "error from kill") { + t.Fatal("canceling context should not lead to logging an error") + } +} diff --git a/tfexec/internal/e2etest/errors_test.go b/tfexec/internal/e2etest/errors_test.go index 804024ea..95775557 100644 --- a/tfexec/internal/e2etest/errors_test.go +++ b/tfexec/internal/e2etest/errors_test.go @@ -205,13 +205,21 @@ func TestContext_sleepTimeoutExpired(t *testing.T) { ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() - err = tf.Apply(ctx) - if err == nil { - t.Fatal("expected error, but didn't find one") - } - - if !errors.Is(err, context.DeadlineExceeded) { - t.Fatalf("expected context.DeadlineExceeded, got %T %s", err, err) + errCh := make(chan error) + go func() { + err = tf.Apply(ctx) + if err != nil { + errCh <- err + } + }() + + select { + case err := <-errCh: + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("expected context.DeadlineExceeded, got %T %s", err, err) + } + case <-time.After(time.Second * 10): + t.Fatal("terraform apply should have canceled and returned in ~5s") } }) } diff --git a/tfexec/internal/e2etest/util_test.go b/tfexec/internal/e2etest/util_test.go index 2474cd75..49b8d51f 100644 --- a/tfexec/internal/e2etest/util_test.go +++ b/tfexec/internal/e2etest/util_test.go @@ -102,16 +102,21 @@ func runTestVersions(t *testing.T, versions []string, fixtureName string, cb fun } } - var stdouterr strings.Builder - tf.SetStdout(&stdouterr) - tf.SetStderr(&stdouterr) + // Separate strings.Builder because it's not concurrent safe + var stdout strings.Builder + tf.SetStdout(&stdout) + var stderr strings.Builder + tf.SetStderr(&stderr) tf.SetLogger(&testingPrintfer{t}) // TODO: capture panics here? cb(t, runningVersion, tf) - t.Logf("CLI Output:\n%s", stdouterr.String()) + t.Logf("CLI Output:\n%s", stdout.String()) + if len(stderr.String()) > 0 { + t.Logf("CLI Error:\n%s", stderr.String()) + } }) } }