@@ -10,6 +10,7 @@ import (
10
10
"fmt"
11
11
"os/exec"
12
12
"sync"
13
+ "sync/atomic"
13
14
"time"
14
15
15
16
"github.com/Masterminds/vcs"
@@ -25,6 +26,18 @@ type monitoredCmd struct {
25
26
stderr * activityBuffer
26
27
}
27
28
29
+ // noProgressError indicates that the monitored process was terminated due to
30
+ // exceeding exceeding the progress timeout.
31
+ type noProgressError struct {
32
+ timeout time.Duration
33
+ }
34
+
35
+ // killCmdError indicates that an error occurred while sending a kill signal to
36
+ // the monitored process.
37
+ type killCmdError struct {
38
+ err error
39
+ }
40
+
28
41
func newMonitoredCmd (cmd * exec.Cmd , timeout time.Duration ) * monitoredCmd {
29
42
stdout , stderr := newActivityBuffer (), newActivityBuffer ()
30
43
cmd .Stdout , cmd .Stderr = stdout , stderr
@@ -37,46 +50,73 @@ func newMonitoredCmd(cmd *exec.Cmd, timeout time.Duration) *monitoredCmd {
37
50
}
38
51
39
52
// run will wait for the command to finish and return the error, if any. If the
40
- // command does not show any activity for more than the specified timeout the
41
- // process will be killed.
53
+ // command does not show any progress, as indicated by writing to stdout or
54
+ // stderr, for more than the specified timeout, the process will be killed.
42
55
func (c * monitoredCmd ) run (ctx context.Context ) error {
43
56
// Check for cancellation before even starting
44
57
if ctx .Err () != nil {
45
58
return ctx .Err ()
46
59
}
47
60
48
- ticker := time .NewTicker (c .timeout )
49
- done := make (chan error , 1 )
50
- defer ticker .Stop ()
51
-
52
61
err := c .cmd .Start ()
53
62
if err != nil {
54
63
return err
55
64
}
56
65
66
+ ticker := time .NewTicker (c .timeout )
67
+ defer ticker .Stop ()
68
+
69
+ // Atomic marker to track proc exit state. Guards against bad channel
70
+ // select receive order, where a tick or context cancellation could come
71
+ // in at the same time as process completion, but one of the former are
72
+ // picked first; in such a case, cmd.Process could(?) be nil by the time we
73
+ // call signal methods on it.
74
+ var isDone * int32 = new (int32 )
75
+ done := make (chan error , 1 )
76
+
57
77
go func () {
78
+ // Wait() can only be called once, so this must act as the completion
79
+ // indicator for both normal *and* signal-induced termination.
58
80
done <- c .cmd .Wait ()
81
+ atomic .CompareAndSwapInt32 (isDone , 0 , 1 )
59
82
}()
60
83
84
+ var killerr error
85
+ selloop:
61
86
for {
62
87
select {
88
+ case err := <- done :
89
+ return err
63
90
case <- ticker .C :
64
- if c .hasTimedOut () {
65
- if err := c .cmd .Process .Kill (); err != nil {
66
- return & killCmdError {err }
91
+ if ! atomic .CompareAndSwapInt32 (isDone , 1 , 1 ) && c .hasTimedOut () {
92
+ if err := killProcess (c .cmd , isDone ); err != nil {
93
+ killerr = & killCmdError {err }
94
+ } else {
95
+ killerr = & noProgressError {c .timeout }
67
96
}
68
-
69
- return & timeoutError {c .timeout }
97
+ break selloop
70
98
}
71
99
case <- ctx .Done ():
72
- if err := c .cmd .Process .Kill (); err != nil {
73
- return & killCmdError {err }
100
+ if ! atomic .CompareAndSwapInt32 (isDone , 1 , 1 ) {
101
+ if err := killProcess (c .cmd , isDone ); err != nil {
102
+ killerr = & killCmdError {err }
103
+ } else {
104
+ killerr = ctx .Err ()
105
+ }
106
+ break selloop
74
107
}
75
- return ctx .Err ()
76
- case err := <- done :
77
- return err
78
108
}
79
109
}
110
+
111
+ // This is only reachable on the signal-induced termination path, so block
112
+ // until a message comes through the channel indicating that the command has
113
+ // exited.
114
+ //
115
+ // TODO(sdboyer) if the signaling process errored (resulting in a
116
+ // killCmdError stored in killerr), is it possible that this receive could
117
+ // block forever on some kind of hung process?
118
+ <- done
119
+ return killerr
80
120
}
81
121
82
122
func (c * monitoredCmd ) hasTimedOut () bool {
@@ -90,6 +130,7 @@ func (c *monitoredCmd) combinedOutput(ctx context.Context) ([]byte, error) {
90
130
return c .stderr .buf .Bytes (), err
91
131
}
92
132
133
+ // FIXME(sdboyer) this is not actually combined output
93
134
return c .stdout .buf .Bytes (), nil
94
135
}
95
136
@@ -120,18 +161,10 @@ func (b *activityBuffer) lastActivity() time.Time {
120
161
return b .lastActivityStamp
121
162
}
122
163
123
- type timeoutError struct {
124
- timeout time.Duration
125
- }
126
-
127
- func (e timeoutError ) Error () string {
164
+ func (e noProgressError ) Error () string {
128
165
return fmt .Sprintf ("command killed after %s of no activity" , e .timeout )
129
166
}
130
167
131
- type killCmdError struct {
132
- err error
133
- }
134
-
135
168
func (e killCmdError ) Error () string {
136
169
return fmt .Sprintf ("error killing command: %s" , e .err )
137
170
}
0 commit comments