Skip to content
This repository was archived by the owner on Sep 9, 2020. It is now read-only.

Commit 3781a6f

Browse files
authored
Merge pull request #857 from sdboyer/gentler-termination
Use gentler subprocess termination semantics
2 parents bcfa7ee + 74efbb1 commit 3781a6f

File tree

4 files changed

+130
-27
lines changed

4 files changed

+130
-27
lines changed

internal/gps/cmd.go

Lines changed: 58 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"fmt"
1111
"os/exec"
1212
"sync"
13+
"sync/atomic"
1314
"time"
1415

1516
"github.com/Masterminds/vcs"
@@ -25,6 +26,18 @@ type monitoredCmd struct {
2526
stderr *activityBuffer
2627
}
2728

29+
// noProgressError indicates that the monitored process was terminated due to
30+
// exceeding exceeding the progress timeout.
31+
type noProgressError struct {
32+
timeout time.Duration
33+
}
34+
35+
// killCmdError indicates that an error occurred while sending a kill signal to
36+
// the monitored process.
37+
type killCmdError struct {
38+
err error
39+
}
40+
2841
func newMonitoredCmd(cmd *exec.Cmd, timeout time.Duration) *monitoredCmd {
2942
stdout, stderr := newActivityBuffer(), newActivityBuffer()
3043
cmd.Stdout, cmd.Stderr = stdout, stderr
@@ -37,46 +50,73 @@ func newMonitoredCmd(cmd *exec.Cmd, timeout time.Duration) *monitoredCmd {
3750
}
3851

3952
// run will wait for the command to finish and return the error, if any. If the
40-
// command does not show any activity for more than the specified timeout the
41-
// process will be killed.
53+
// command does not show any progress, as indicated by writing to stdout or
54+
// stderr, for more than the specified timeout, the process will be killed.
4255
func (c *monitoredCmd) run(ctx context.Context) error {
4356
// Check for cancellation before even starting
4457
if ctx.Err() != nil {
4558
return ctx.Err()
4659
}
4760

48-
ticker := time.NewTicker(c.timeout)
49-
done := make(chan error, 1)
50-
defer ticker.Stop()
51-
5261
err := c.cmd.Start()
5362
if err != nil {
5463
return err
5564
}
5665

66+
ticker := time.NewTicker(c.timeout)
67+
defer ticker.Stop()
68+
69+
// Atomic marker to track proc exit state. Guards against bad channel
70+
// select receive order, where a tick or context cancellation could come
71+
// in at the same time as process completion, but one of the former are
72+
// picked first; in such a case, cmd.Process could(?) be nil by the time we
73+
// call signal methods on it.
74+
var isDone *int32 = new(int32)
75+
done := make(chan error, 1)
76+
5777
go func() {
78+
// Wait() can only be called once, so this must act as the completion
79+
// indicator for both normal *and* signal-induced termination.
5880
done <- c.cmd.Wait()
81+
atomic.CompareAndSwapInt32(isDone, 0, 1)
5982
}()
6083

84+
var killerr error
85+
selloop:
6186
for {
6287
select {
88+
case err := <-done:
89+
return err
6390
case <-ticker.C:
64-
if c.hasTimedOut() {
65-
if err := c.cmd.Process.Kill(); err != nil {
66-
return &killCmdError{err}
91+
if !atomic.CompareAndSwapInt32(isDone, 1, 1) && c.hasTimedOut() {
92+
if err := killProcess(c.cmd, isDone); err != nil {
93+
killerr = &killCmdError{err}
94+
} else {
95+
killerr = &noProgressError{c.timeout}
6796
}
68-
69-
return &timeoutError{c.timeout}
97+
break selloop
7098
}
7199
case <-ctx.Done():
72-
if err := c.cmd.Process.Kill(); err != nil {
73-
return &killCmdError{err}
100+
if !atomic.CompareAndSwapInt32(isDone, 1, 1) {
101+
if err := killProcess(c.cmd, isDone); err != nil {
102+
killerr = &killCmdError{err}
103+
} else {
104+
killerr = ctx.Err()
105+
}
106+
break selloop
74107
}
75-
return ctx.Err()
76-
case err := <-done:
77-
return err
78108
}
79109
}
110+
111+
// This is only reachable on the signal-induced termination path, so block
112+
// until a message comes through the channel indicating that the command has
113+
// exited.
114+
//
115+
// TODO(sdboyer) if the signaling process errored (resulting in a
116+
// killCmdError stored in killerr), is it possible that this receive could
117+
// block forever on some kind of hung process?
118+
<-done
119+
return killerr
80120
}
81121

82122
func (c *monitoredCmd) hasTimedOut() bool {
@@ -90,6 +130,7 @@ func (c *monitoredCmd) combinedOutput(ctx context.Context) ([]byte, error) {
90130
return c.stderr.buf.Bytes(), err
91131
}
92132

133+
// FIXME(sdboyer) this is not actually combined output
93134
return c.stdout.buf.Bytes(), nil
94135
}
95136

@@ -120,18 +161,10 @@ func (b *activityBuffer) lastActivity() time.Time {
120161
return b.lastActivityStamp
121162
}
122163

123-
type timeoutError struct {
124-
timeout time.Duration
125-
}
126-
127-
func (e timeoutError) Error() string {
164+
func (e noProgressError) Error() string {
128165
return fmt.Sprintf("command killed after %s of no activity", e.timeout)
129166
}
130167

131-
type killCmdError struct {
132-
err error
133-
}
134-
135168
func (e killCmdError) Error() string {
136169
return fmt.Sprintf("error killing command: %s", e.err)
137170
}

internal/gps/cmd_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ import (
1616
func mkTestCmd(iterations int) *monitoredCmd {
1717
return newMonitoredCmd(
1818
exec.Command("./echosleep", "-n", fmt.Sprint(iterations)),
19-
500*time.Millisecond,
19+
490*time.Millisecond,
2020
)
2121
}
2222

@@ -49,7 +49,7 @@ func TestMonitoredCmd(t *testing.T) {
4949
t.Error("Expected command to fail")
5050
}
5151

52-
_, ok := err.(*timeoutError)
52+
_, ok := err.(*noProgressError)
5353
if !ok {
5454
t.Errorf("Expected a timeout error, but got: %s", err)
5555
}

internal/gps/cmd_unix.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
// Copyright 2017 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// +build !windows
6+
7+
package gps
8+
9+
import (
10+
"os"
11+
"os/exec"
12+
"sync/atomic"
13+
"time"
14+
)
15+
16+
// killProcess manages the termination of subprocesses in a way that tries to be
17+
// gentle (via os.Interrupt), but resorts to Kill if needed.
18+
//
19+
//
20+
// TODO(sdboyer) should the return differentiate between whether gentle methods
21+
// succeeded vs. falling back to a hard kill?
22+
func killProcess(cmd *exec.Cmd, isDone *int32) error {
23+
if err := cmd.Process.Signal(os.Interrupt); err != nil {
24+
// If an error comes back from attempting to signal, proceed immediately
25+
// to hard kill.
26+
return cmd.Process.Kill()
27+
}
28+
29+
// If the process doesn't exit immediately, check every 50ms, up to 3s,
30+
// after which send a hard kill.
31+
//
32+
// Cannot rely on cmd.ProcessState.Exited() here, as that is not set
33+
// correctly when the process exits due to a signal. See
34+
// https://github.com/golang/go/issues/19798 . Also cannot rely on it
35+
// because cmd.ProcessState will be nil before the process exits, and
36+
// checking if nil create a data race.
37+
if !atomic.CompareAndSwapInt32(isDone, 1, 1) {
38+
to := time.NewTimer(3 * time.Second)
39+
tick := time.NewTicker(50 * time.Millisecond)
40+
41+
defer to.Stop()
42+
defer tick.Stop()
43+
44+
// Loop until the ProcessState shows up, indicating the proc has exited,
45+
// or the timer expires and
46+
for !atomic.CompareAndSwapInt32(isDone, 1, 1) {
47+
select {
48+
case <-to.C:
49+
return cmd.Process.Kill()
50+
case <-tick.C:
51+
}
52+
}
53+
}
54+
55+
return nil
56+
}

internal/gps/cmd_windows.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// Copyright 2017 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
// +build windows
6+
7+
package gps
8+
9+
import "os/exec"
10+
11+
func killProcess(cmd *exec.Cmd, isDone *int32) error {
12+
// TODO it'd be great if this could be more sophisticated...
13+
return cmd.Process.Kill()
14+
}

0 commit comments

Comments
 (0)