Skip to content

Commit e3ff563

Browse files
lornasongkylecarbs
andauthored
Test for runTerraformCmd leaked go-routine (#299)
* Update TestContext_sleepTimeoutExpired to check for canceling within timeframe Add a timeout to the test to ensure that the terraform apply cancels within a reasonable time of the 5s timeout. Currently, this test is not canceling the terraform apply as expected. In the logs you can see that the test takes 1 min rather than ~5s: ``` --- PASS: TestContext_sleepTimeoutExpired/sleep-0.12.31 (62.13s) ``` ``` === RUN TestContext_sleepTimeoutExpired/sleep-0.12.31 util_test.go:113: [INFO] running Terraform command: /var/folders/6y/gy9gggt14379c_k39vwb50lc0000gn/T/terraform_1378921380/terraform apply -no-color -auto-approve -input=false -lock=true -parallelism=10 -refresh=true util_test.go:103: CLI Output: // truncated ... time_sleep.sleep: Creating... time_sleep.sleep: Still creating... [10s elapsed] time_sleep.sleep: Still creating... [20s elapsed] time_sleep.sleep: Still creating... [30s elapsed] time_sleep.sleep: Still creating... [41s elapsed] time_sleep.sleep: Still creating... [51s elapsed] time_sleep.sleep: Creation complete after 1m0s [id=2022-05-06T17:40:20Z] Apply complete! Resources: 1 added, 0 changed, 0 destroyed. ``` * Remove runTerraformCmd check for cmd.ProcessState Processes were not being killed because cmd.ProcessState was nil. With this change, processes will be able to make the request to Kill(). Added a temporary log to printout cmd.ProcessState to demonstrate. Will be removed in next commit. Note: this will cause hanging `TestContext_sleepTimeoutExpired` due to a known Golang issue with killing a command when Stdout or Stderr are set to anything besides `nil` or `*os.File`. This is because the Kill does not notify the stdout/stderr subprocesses to stop. `cmd.Wait` (called by `cmd.Run`) waits indefinitely for those subprocesses to stop. * Read logs from Stderr/out Pipe to avoid hanging `TestContext_sleepTimeoutExpired` can occasionally hang when killing a command that has Stdout or Stderr set to anything besides `nil` or `*os.File`. golang/go#23019 Use workaround to read from StdoutPipe and StderrPipe rather than setting Stdout / Stderr * Test for runTerraformCmd leaked go-routine Currently, when runTerraformCmd is called, it starts a go-routine to kill the Terraform CLI on context.Done(). However, when the Terraform CLI completes and runTerraformCmd() finishes, the go-routine continues running unnecessarily. If the caller cancels the context down the line, this will stop the go-routine and it will log the error: "error from kill: os: process already finished" because the Terraform CLI has already finished. Added a test for this in cmd_default.go and cmd_linux.go. Have not tried it in linux yet though. When running with the race detector: ``` ================== WARNING: DATA RACE Read at 0x00c0002516c8 by goroutine 7: bytes.(*Buffer).String() /usr/local/go/src/bytes/buffer.go:65 +0x36a github.com/hashicorp/terraform-exec/tfexec.Test_runTerraformCmd_default() /Users/lornasong/go/src/github.com/hashicorp/terraform-exec/tfexec/cmd_default_test.go:35 +0x360 testing.tRunner() // truncated ... Previous write at 0x00c0002516c8 by goroutine 8: bytes.(*Buffer).grow() /usr/local/go/src/bytes/buffer.go:147 +0x3b1 bytes.(*Buffer).Write() /usr/local/go/src/bytes/buffer.go:172 +0xcd log.(*Logger).Output() /usr/local/go/src/log/log.go:184 +0x466 log.(*Logger).Printf() /usr/local/go/src/log/log.go:191 +0x6e github.com/hashicorp/terraform-exec/tfexec.(*Terraform).runTerraformCmd.func1() /Users/lornasong/go/src/github.com/hashicorp/terraform-exec/tfexec/cmd_default.go:24 +0x2a5 // truncated ... ================== * Use CommandContext to kill instead of manually doing it * Fix EOF error check to use error.Is() This also fixes a race condition caused by using the pointer to the io.EOF * Update tests to use separate string.Builder-s for stdout and stderr string.Builder is a non-comparable type which is not safe for concurrent use when shared by Cmd.Stdout and Cmd.Stderr. Causes a race condition when accessing the the builder when Cmd is running. * Fixes to runTerraformCmd for race conditions - Use waitgroups for more readability - Improve handling errors from writeOutput - Finish reading from pipes before calling cmd.Wait - fixes a race condition that leads to an error :`read |0: file already closed` - Because now waiting for pipes to finish reading, need to update waitGroup to close buf.Read on context cancel. Otherwise buf.Read blocks until next line before stopping. Causes TestContext_sleepTimeoutExpired takes a little too long to cancel (~20s) Co-authored-by: Kyle Carberry <[email protected]>
1 parent 2c1b136 commit e3ff563

File tree

7 files changed

+245
-47
lines changed

7 files changed

+245
-47
lines changed

tfexec/cmd.go

+36-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
package tfexec
22

33
import (
4+
"bufio"
45
"bytes"
56
"context"
67
"encoding/json"
8+
"errors"
79
"fmt"
810
"io"
911
"io/ioutil"
@@ -170,7 +172,7 @@ func (tf *Terraform) buildEnv(mergeEnv map[string]string) []string {
170172
}
171173

172174
func (tf *Terraform) buildTerraformCmd(ctx context.Context, mergeEnv map[string]string, args ...string) *exec.Cmd {
173-
cmd := exec.Command(tf.execPath, args...)
175+
cmd := exec.CommandContext(ctx, tf.execPath, args...)
174176

175177
cmd.Env = tf.buildEnv(mergeEnv)
176178
cmd.Dir = tf.workingDir
@@ -229,3 +231,36 @@ func mergeWriters(writers ...io.Writer) io.Writer {
229231
}
230232
return io.MultiWriter(compact...)
231233
}
234+
235+
func writeOutput(ctx context.Context, r io.ReadCloser, w io.Writer) error {
236+
// ReadBytes will block until bytes are read, which can cause a delay in
237+
// returning even if the command's context has been canceled. Use a separate
238+
// goroutine to prompt ReadBytes to return on cancel
239+
closeCtx, closeCancel := context.WithCancel(ctx)
240+
defer closeCancel()
241+
go func() {
242+
select {
243+
case <-ctx.Done():
244+
r.Close()
245+
case <-closeCtx.Done():
246+
return
247+
}
248+
}()
249+
250+
buf := bufio.NewReader(r)
251+
for {
252+
line, err := buf.ReadBytes('\n')
253+
if len(line) > 0 {
254+
if _, err := w.Write(line); err != nil {
255+
return err
256+
}
257+
}
258+
if err != nil {
259+
if errors.Is(err, io.EOF) {
260+
return nil
261+
}
262+
263+
return err
264+
}
265+
}
266+
}

tfexec/cmd_default.go

+55-16
Original file line numberDiff line numberDiff line change
@@ -7,40 +7,79 @@ import (
77
"context"
88
"os/exec"
99
"strings"
10+
"sync"
1011
)
1112

1213
func (tf *Terraform) runTerraformCmd(ctx context.Context, cmd *exec.Cmd) error {
1314
var errBuf strings.Builder
1415

15-
cmd.Stdout = mergeWriters(cmd.Stdout, tf.stdout)
16-
cmd.Stderr = mergeWriters(cmd.Stderr, tf.stderr, &errBuf)
17-
18-
go func() {
19-
<-ctx.Done()
20-
if ctx.Err() == context.DeadlineExceeded || ctx.Err() == context.Canceled {
21-
if cmd != nil && cmd.Process != nil && cmd.ProcessState != nil {
22-
err := cmd.Process.Kill()
23-
if err != nil {
24-
tf.logger.Printf("error from kill: %s", err)
25-
}
26-
}
27-
}
28-
}()
29-
3016
// check for early cancellation
3117
select {
3218
case <-ctx.Done():
3319
return ctx.Err()
3420
default:
3521
}
3622

37-
err := cmd.Run()
23+
// Read stdout / stderr logs from pipe instead of setting cmd.Stdout and
24+
// cmd.Stderr because it can cause hanging when killing the command
25+
// https://github.com/golang/go/issues/23019
26+
stdoutWriter := mergeWriters(cmd.Stdout, tf.stdout)
27+
stderrWriter := mergeWriters(tf.stderr, &errBuf)
28+
29+
cmd.Stderr = nil
30+
cmd.Stdout = nil
31+
32+
stdoutPipe, err := cmd.StdoutPipe()
33+
if err != nil {
34+
return err
35+
}
36+
37+
stderrPipe, err := cmd.StderrPipe()
38+
if err != nil {
39+
return err
40+
}
41+
42+
err = cmd.Start()
43+
if err == nil && ctx.Err() != nil {
44+
err = ctx.Err()
45+
}
46+
if err != nil {
47+
return tf.wrapExitError(ctx, err, "")
48+
}
49+
50+
var errStdout, errStderr error
51+
var wg sync.WaitGroup
52+
wg.Add(1)
53+
go func() {
54+
defer wg.Done()
55+
errStdout = writeOutput(ctx, stdoutPipe, stdoutWriter)
56+
}()
57+
58+
wg.Add(1)
59+
go func() {
60+
defer wg.Done()
61+
errStderr = writeOutput(ctx, stderrPipe, stderrWriter)
62+
}()
63+
64+
// Reads from pipes must be completed before calling cmd.Wait(). Otherwise
65+
// can cause a race condition
66+
wg.Wait()
67+
68+
err = cmd.Wait()
3869
if err == nil && ctx.Err() != nil {
3970
err = ctx.Err()
4071
}
4172
if err != nil {
4273
return tf.wrapExitError(ctx, err, errBuf.String())
4374
}
4475

76+
// Return error if there was an issue reading the std out/err
77+
if errStdout != nil && ctx.Err() != nil {
78+
return tf.wrapExitError(ctx, errStdout, errBuf.String())
79+
}
80+
if errStderr != nil && ctx.Err() != nil {
81+
return tf.wrapExitError(ctx, errStderr, errBuf.String())
82+
}
83+
4584
return nil
4685
}

tfexec/cmd_default_test.go

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
//go:build !linux
2+
// +build !linux
3+
4+
package tfexec
5+
6+
import (
7+
"bytes"
8+
"context"
9+
"log"
10+
"strings"
11+
"testing"
12+
"time"
13+
)
14+
15+
func Test_runTerraformCmd_default(t *testing.T) {
16+
// Checks runTerraformCmd for race condition when using
17+
// go test -race -run Test_runTerraformCmd_default ./tfexec
18+
var buf bytes.Buffer
19+
20+
tf := &Terraform{
21+
logger: log.New(&buf, "", 0),
22+
execPath: "echo",
23+
}
24+
25+
ctx, cancel := context.WithCancel(context.Background())
26+
27+
cmd := tf.buildTerraformCmd(ctx, nil, "hello tf-exec!")
28+
err := tf.runTerraformCmd(ctx, cmd)
29+
if err != nil {
30+
t.Fatal(err)
31+
}
32+
33+
// Cancel stops the leaked go routine which logs an error
34+
cancel()
35+
time.Sleep(time.Second)
36+
if strings.Contains(buf.String(), "error from kill") {
37+
t.Fatal("canceling context should not lead to logging an error")
38+
}
39+
}

tfexec/cmd_linux.go

+55-19
Original file line numberDiff line numberDiff line change
@@ -4,51 +4,87 @@ import (
44
"context"
55
"os/exec"
66
"strings"
7+
"sync"
78
"syscall"
89
)
910

1011
func (tf *Terraform) runTerraformCmd(ctx context.Context, cmd *exec.Cmd) error {
1112
var errBuf strings.Builder
1213

13-
cmd.Stdout = mergeWriters(cmd.Stdout, tf.stdout)
14-
cmd.Stderr = mergeWriters(cmd.Stderr, tf.stderr, &errBuf)
15-
1614
cmd.SysProcAttr = &syscall.SysProcAttr{
1715
// kill children if parent is dead
1816
Pdeathsig: syscall.SIGKILL,
1917
// set process group ID
2018
Setpgid: true,
2119
}
2220

23-
go func() {
24-
<-ctx.Done()
25-
if ctx.Err() == context.DeadlineExceeded || ctx.Err() == context.Canceled {
26-
if cmd != nil && cmd.Process != nil && cmd.ProcessState != nil {
27-
// send SIGINT to process group
28-
err := syscall.Kill(-cmd.Process.Pid, syscall.SIGINT)
29-
if err != nil {
30-
tf.logger.Printf("error from SIGINT: %s", err)
31-
}
32-
}
33-
34-
// TODO: send a kill if it doesn't respond for a bit?
35-
}
36-
}()
37-
3821
// check for early cancellation
3922
select {
4023
case <-ctx.Done():
4124
return ctx.Err()
4225
default:
4326
}
4427

45-
err := cmd.Run()
28+
// Read stdout / stderr logs from pipe instead of setting cmd.Stdout and
29+
// cmd.Stderr because it can cause hanging when killing the command
30+
// https://github.com/golang/go/issues/23019
31+
stdoutWriter := mergeWriters(cmd.Stdout, tf.stdout)
32+
stderrWriter := mergeWriters(tf.stderr, &errBuf)
33+
34+
cmd.Stderr = nil
35+
cmd.Stdout = nil
36+
37+
stdoutPipe, err := cmd.StdoutPipe()
38+
if err != nil {
39+
return err
40+
}
41+
42+
stderrPipe, err := cmd.StderrPipe()
43+
if err != nil {
44+
return err
45+
}
46+
47+
err = cmd.Start()
48+
if err == nil && ctx.Err() != nil {
49+
err = ctx.Err()
50+
}
51+
if err != nil {
52+
return tf.wrapExitError(ctx, err, "")
53+
}
54+
55+
var errStdout, errStderr error
56+
var wg sync.WaitGroup
57+
wg.Add(1)
58+
go func() {
59+
defer wg.Done()
60+
errStdout = writeOutput(ctx, stdoutPipe, stdoutWriter)
61+
}()
62+
63+
wg.Add(1)
64+
go func() {
65+
defer wg.Done()
66+
errStderr = writeOutput(ctx, stderrPipe, stderrWriter)
67+
}()
68+
69+
// Reads from pipes must be completed before calling cmd.Wait(). Otherwise
70+
// can cause a race condition
71+
wg.Wait()
72+
73+
err = cmd.Wait()
4674
if err == nil && ctx.Err() != nil {
4775
err = ctx.Err()
4876
}
4977
if err != nil {
5078
return tf.wrapExitError(ctx, err, errBuf.String())
5179
}
5280

81+
// Return error if there was an issue reading the std out/err
82+
if errStdout != nil && ctx.Err() != nil {
83+
return tf.wrapExitError(ctx, errStdout, errBuf.String())
84+
}
85+
if errStderr != nil && ctx.Err() != nil {
86+
return tf.wrapExitError(ctx, errStderr, errBuf.String())
87+
}
88+
5389
return nil
5490
}

tfexec/cmd_linux_test.go

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
package tfexec
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"log"
7+
"strings"
8+
"testing"
9+
"time"
10+
)
11+
12+
func Test_runTerraformCmd_linux(t *testing.T) {
13+
// Checks runTerraformCmd for race condition when using
14+
// go test -race -run Test_runTerraformCmd_linux ./tfexec -tags=linux
15+
var buf bytes.Buffer
16+
17+
tf := &Terraform{
18+
logger: log.New(&buf, "", 0),
19+
execPath: "echo",
20+
}
21+
22+
ctx, cancel := context.WithCancel(context.Background())
23+
24+
cmd := tf.buildTerraformCmd(ctx, nil, "hello tf-exec!")
25+
err := tf.runTerraformCmd(ctx, cmd)
26+
if err != nil {
27+
t.Fatal(err)
28+
}
29+
30+
// Cancel stops the leaked go routine which logs an error
31+
cancel()
32+
time.Sleep(time.Second)
33+
if strings.Contains(buf.String(), "error from kill") {
34+
t.Fatal("canceling context should not lead to logging an error")
35+
}
36+
}

tfexec/internal/e2etest/errors_test.go

+15-7
Original file line numberDiff line numberDiff line change
@@ -205,13 +205,21 @@ func TestContext_sleepTimeoutExpired(t *testing.T) {
205205
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
206206
defer cancel()
207207

208-
err = tf.Apply(ctx)
209-
if err == nil {
210-
t.Fatal("expected error, but didn't find one")
211-
}
212-
213-
if !errors.Is(err, context.DeadlineExceeded) {
214-
t.Fatalf("expected context.DeadlineExceeded, got %T %s", err, err)
208+
errCh := make(chan error)
209+
go func() {
210+
err = tf.Apply(ctx)
211+
if err != nil {
212+
errCh <- err
213+
}
214+
}()
215+
216+
select {
217+
case err := <-errCh:
218+
if !errors.Is(err, context.DeadlineExceeded) {
219+
t.Fatalf("expected context.DeadlineExceeded, got %T %s", err, err)
220+
}
221+
case <-time.After(time.Second * 10):
222+
t.Fatal("terraform apply should have canceled and returned in ~5s")
215223
}
216224
})
217225
}

tfexec/internal/e2etest/util_test.go

+9-4
Original file line numberDiff line numberDiff line change
@@ -102,16 +102,21 @@ func runTestVersions(t *testing.T, versions []string, fixtureName string, cb fun
102102
}
103103
}
104104

105-
var stdouterr strings.Builder
106-
tf.SetStdout(&stdouterr)
107-
tf.SetStderr(&stdouterr)
105+
// Separate strings.Builder because it's not concurrent safe
106+
var stdout strings.Builder
107+
tf.SetStdout(&stdout)
108+
var stderr strings.Builder
109+
tf.SetStderr(&stderr)
108110

109111
tf.SetLogger(&testingPrintfer{t})
110112

111113
// TODO: capture panics here?
112114
cb(t, runningVersion, tf)
113115

114-
t.Logf("CLI Output:\n%s", stdouterr.String())
116+
t.Logf("CLI Output:\n%s", stdout.String())
117+
if len(stderr.String()) > 0 {
118+
t.Logf("CLI Error:\n%s", stderr.String())
119+
}
115120
})
116121
}
117122
}

0 commit comments

Comments
 (0)