5
5
package supervisor
6
6
7
7
import (
8
+ "bufio"
8
9
"context"
9
10
"encoding/json"
10
11
"fmt"
@@ -14,6 +15,7 @@ import (
14
15
"os"
15
16
"os/exec"
16
17
"os/signal"
18
+ "path/filepath"
17
19
"runtime"
18
20
"strconv"
19
21
"strings"
@@ -169,20 +171,30 @@ func Run(options ...RunOption) {
169
171
}
170
172
apiServices = append (apiServices , additionalServices ... )
171
173
174
+ // The reaper can be turned into a terminating reaper by writing true to this channel.
175
+ // When in terminating mode, the reaper will send SIGTERM to each child that gets reparented
176
+ // to us and is still running. We use this mechanism to send SIGTERM to a shell child processes
177
+ // that get reparented once their parent shell terminates during shutdown.
178
+ terminatingReaper := make (chan bool )
179
+ // We keep the reaper until the bitter end because:
180
+ // - it doesn't need graceful shutdown
181
+ // - we want to do as much work as possible (SIGTERM'ing reparented processes during shutdown).
182
+ go reaper (terminatingReaper )
183
+
184
+ var ideWG sync.WaitGroup
185
+ ideWG .Add (1 )
186
+ go startAndWatchIDE (ctx , cfg , & ideWG , ideReady )
187
+
172
188
var wg sync.WaitGroup
173
- wg .Add (6 )
174
- go reaper (ctx , & wg )
175
- go startAndWatchIDE (ctx , cfg , & wg , ideReady )
189
+ wg .Add (4 )
176
190
go startContentInit (ctx , cfg , & wg , cstate )
177
191
go startAPIEndpoint (ctx , cfg , & wg , apiServices , apiEndpointOpts ... )
178
192
go taskManager .Run (ctx , & wg )
179
- go func () {
180
- defer wg .Done ()
181
- if cfg .isHeadless () {
182
- return
183
- }
184
- portMgmt .Run ()
185
- }()
193
+
194
+ if ! cfg .isHeadless () {
195
+ wg .Add (1 )
196
+ go portMgmt .Run (ctx , & wg )
197
+ }
186
198
187
199
if cfg .PreventMetadataAccess {
188
200
go func () {
@@ -203,15 +215,21 @@ func Run(options ...RunOption) {
203
215
}
204
216
205
217
log .Info ("received SIGTERM - tearing down" )
218
+ terminatingReaper <- true
219
+ cancel ()
206
220
err = termMux .Close ()
207
221
if err != nil {
208
222
log .WithError (err ).Error ("terminal closure failed" )
209
223
}
224
+
225
+ // terminate all child processes once the IDE is gone
226
+ ideWG .Wait ()
227
+ terminateChildProcesses ()
228
+
210
229
if ! opts .InNamespace {
211
230
callDaemonTeardown ()
212
231
}
213
232
214
- cancel ()
215
233
wg .Wait ()
216
234
}
217
235
@@ -305,16 +323,17 @@ func hasMetadataAccess() bool {
305
323
return false
306
324
}
307
325
308
- func reaper (ctx context. Context , wg * sync. WaitGroup ) {
309
- defer wg . Done ( )
326
+ func reaper (terminatingReaper <- chan bool ) {
327
+ defer log . Debug ( "reaper shutdown" )
310
328
329
+ var terminating bool
311
330
sigs := make (chan os.Signal , 128 )
312
331
signal .Notify (sigs , syscall .SIGCHLD )
313
332
for {
314
333
select {
315
- case <- ctx .Done ():
316
- return
317
334
case <- sigs :
335
+ case terminating = <- terminatingReaper :
336
+ continue
318
337
}
319
338
320
339
pid , err := unix .Wait4 (- 1 , nil , 0 , nil )
@@ -325,12 +344,33 @@ func reaper(ctx context.Context, wg *sync.WaitGroup) {
325
344
}
326
345
if err != nil {
327
346
log .WithField ("pid" , pid ).WithError (err ).Debug ("cannot call waitpid() for re-parented child" )
347
+ continue
348
+ }
349
+
350
+ if ! terminating {
351
+ continue
352
+ }
353
+ proc , err := os .FindProcess (pid )
354
+ if err != nil {
355
+ log .WithField ("pid" , pid ).WithError (err ).Debug ("cannot find re-parented process" )
356
+ continue
328
357
}
358
+ err = proc .Signal (syscall .SIGTERM )
359
+ if err != nil {
360
+ if ! strings .Contains (err .Error (), "os: process already finished" ) {
361
+ log .WithField ("pid" , pid ).WithError (err ).Debug ("cannot send SIGTERM to re-parented process" )
362
+ }
363
+
364
+ continue
365
+ }
366
+ log .WithField ("pid" , pid ).Debug ("SIGTERM'ed reparented child process" )
329
367
}
330
368
}
331
369
332
370
func startAndWatchIDE (ctx context.Context , cfg * Config , wg * sync.WaitGroup , ideReady * ideReadyState ) {
333
371
defer wg .Done ()
372
+ defer log .Debug ("startAndWatchIDE shutdown" )
373
+
334
374
if cfg .isHeadless () {
335
375
ideReady .Set (true )
336
376
return
@@ -383,7 +423,7 @@ supervisorLoop:
383
423
}()
384
424
385
425
err = cmd .Wait ()
386
- if err != nil && ! strings .Contains (err .Error (), "signal: interrupt" ) {
426
+ if err != nil && ! ( strings .Contains (err .Error (), "signal: interrupt" ) || strings . Contains ( err . Error (), "wait: no child processes" ) ) {
387
427
log .WithError (err ).Warn ("IDE was stopped" )
388
428
}
389
429
@@ -411,7 +451,7 @@ supervisorLoop:
411
451
case <- ideStopped :
412
452
return
413
453
case <- time .After (timeBudgetIDEShutdown ):
414
- log .Error ("IDE did not stop in time - sending SIGKILL" )
454
+ log .WithField ( "timeBudgetIDEShutdown" , timeBudgetIDEShutdown . String ()). Error ("IDE did not stop in time - sending SIGKILL" )
415
455
cmd .Process .Signal (syscall .SIGKILL )
416
456
}
417
457
}
@@ -530,6 +570,7 @@ func isBlacklistedEnvvar(name string) bool {
530
570
531
571
func startAPIEndpoint (ctx context.Context , cfg * Config , wg * sync.WaitGroup , services []RegisterableService , opts ... grpc.ServerOption ) {
532
572
defer wg .Done ()
573
+ defer log .Debug ("startAPIEndpoint shutdown" )
533
574
534
575
l , err := net .Listen ("tcp" , fmt .Sprintf (":%d" , cfg .APIEndpointPort ))
535
576
if err != nil {
@@ -642,6 +683,52 @@ func startContentInit(ctx context.Context, cfg *Config, wg *sync.WaitGroup, cst
642
683
cst .MarkContentReady (src )
643
684
}
644
685
686
+ func terminateChildProcesses () {
687
+ ppid := strconv .Itoa (os .Getpid ())
688
+ dirs , err := ioutil .ReadDir ("/proc" )
689
+ if err != nil {
690
+ log .WithError (err ).Warn ("cannot terminate child processes" )
691
+ return
692
+ }
693
+ for _ , d := range dirs {
694
+ pid , err := strconv .Atoi (d .Name ())
695
+ if err != nil {
696
+ // not a PID
697
+ continue
698
+ }
699
+ proc , err := os .FindProcess (pid )
700
+ if err != nil {
701
+ continue
702
+ }
703
+
704
+ var isChild bool
705
+ f , err := os .Open (filepath .Join ("/proc" , d .Name (), "status" ))
706
+ if err != nil {
707
+ continue
708
+ }
709
+ scan := bufio .NewScanner (f )
710
+ for scan .Scan () {
711
+ l := strings .TrimSpace (scan .Text ())
712
+ if ! strings .HasPrefix (l , "PPid:" ) {
713
+ continue
714
+ }
715
+
716
+ isChild = strings .HasSuffix (l , ppid )
717
+ break
718
+ }
719
+ if ! isChild {
720
+ continue
721
+ }
722
+
723
+ err = proc .Signal (unix .SIGTERM )
724
+ if err != nil {
725
+ log .WithError (err ).WithField ("pid" , pid ).Warn ("cannot terminate child processe" )
726
+ continue
727
+ }
728
+ log .WithField ("pid" , pid ).Debug ("SIGTERM'ed child process" )
729
+ }
730
+ }
731
+
645
732
func callDaemonTeardown () {
646
733
log .Info ("asking ws-daemon to tear down this workspace" )
647
734
ctx , cancel := context .WithTimeout (context .Background (), timeBudgetDaemonTeardown )
0 commit comments