@@ -19,6 +19,7 @@ import (
19
19
"math"
20
20
"net"
21
21
"os"
22
+ "os/exec"
22
23
"runtime/debug"
23
24
"strconv"
24
25
"strings"
@@ -45,7 +46,6 @@ import (
45
46
"github.com/gofrs/uuid"
46
47
ptypes "github.com/gogo/protobuf/types"
47
48
"github.com/golang/protobuf/ptypes/empty"
48
- "github.com/hashicorp/go-multierror"
49
49
"github.com/pkg/errors"
50
50
"github.com/sirupsen/logrus"
51
51
"golang.org/x/sys/unix"
@@ -71,6 +71,7 @@ const (
71
71
minVsockIOPort = uint32 (11000 )
72
72
firecrackerStartTimeout = 5 * time .Second
73
73
defaultStopVMTimeout = 5 * time .Second
74
+ defaultShutdownTimeout = 5 * time .Second
74
75
75
76
// StartEventName is the topic published to when a VM starts
76
77
StartEventName = "/firecracker-vm/start"
@@ -548,46 +549,31 @@ func (s *service) mountDrives(requestCtx context.Context, driveMounts []*proto.F
548
549
return nil
549
550
}
550
551
551
- // StopVM will shutdown the firecracker VM and start this shim's shutdown procedure. If the VM has not been
552
- // created yet and the timeout is hit waiting for it to exist, an error will be returned but the shim will
553
- // continue to shutdown.
552
+ // StopVM will shutdown the VMM. Unlike Shutdown, this method is exposed to containerd clients.
553
+ // If the VM has not been created yet and the timeout is hit waiting for it to exist, an error will be returned
554
+ // but the shim will continue to shutdown.
554
555
func (s * service ) StopVM (requestCtx context.Context , request * proto.StopVMRequest ) (_ * empty.Empty , err error ) {
556
+ defer logPanicAndDie (s .logger )
557
+ s .logger .WithFields (logrus.Fields {"timeout_seconds" : request .TimeoutSeconds }).Debug ("StopVM" )
558
+
555
559
timeout := defaultStopVMTimeout
556
560
if request .TimeoutSeconds > 0 {
557
561
timeout = time .Duration (request .TimeoutSeconds ) * time .Second
558
562
}
559
- timer := time .NewTimer (timeout )
560
- defer logPanicAndDie (s .logger )
561
- // If something goes wrong here, just shut down ungracefully. This eliminates some scenarios that would result
562
- // in the user being unable to shut down the VM.
563
- defer func () {
564
- if err != nil {
565
- s .logger .WithError (err ).Error ("StopVM error, shim is shutting down ungracefully" )
566
- s .shimCancel ()
567
- }
568
- }()
569
563
564
+ shutdownCtx , shutdownCancel := context .WithTimeout (context .Background (), timeout )
565
+ defer shutdownCancel ()
566
+
567
+ defer s .shimCancel ()
570
568
err = s .waitVMReady ()
571
569
if err != nil {
572
570
return nil , err
573
571
}
574
572
575
- shutdownCh := make (chan error )
576
- go func () {
577
- defer close (shutdownCh )
578
- _ , err := s .Shutdown (requestCtx , & taskAPI.ShutdownRequest {Now : true })
579
- shutdownCh <- err
580
- }()
581
-
582
- select {
583
- case <- timer .C :
584
- return nil , status .Error (codes .DeadlineExceeded , "timed out waiting for VM shutdown" )
585
- case err = <- shutdownCh :
586
- if err != nil {
587
- return nil , err
588
- }
589
- return & empty.Empty {}, nil
573
+ if err = s .shutdown (requestCtx , shutdownCtx , & taskAPI.ShutdownRequest {Now : true }); err != nil {
574
+ return nil , err
590
575
}
576
+ return & empty.Empty {}, nil
591
577
}
592
578
593
579
// GetVMInfo returns metadata for the VM being managed by this shim. If the VM has not been created yet, this
@@ -1149,54 +1135,71 @@ func (s *service) Connect(requestCtx context.Context, req *taskAPI.ConnectReques
1149
1135
return nil , errdefs .ErrNotImplemented
1150
1136
}
1151
1137
1152
- // Shutdown will attempt a graceful shutdown of the shim+VM. The shutdown procedure will only actually take
1153
- // place if "Now" was set to true OR the VM started successfully, all tasks have been deleted and we were
1154
- // told to shutdown when all tasks were deleted. Otherwise the call is just ignored.
1138
+ // Shutdown will shutdown of the VMM. Unlike StopVM, this method is only exposed to containerd itself.
1155
1139
//
1156
- // Shutdown can be called from a few code paths:
1157
- // * If StopVM is called by the user (in which case "Now" is set to true)
1158
- // * After any task is deleted via containerd's API (containerd calls on behalf of the user)
1159
- // * After any task Create call returns an error (containerd calls on behalf of the user)
1160
- // Shutdown is not directly exposed to containerd clients.
1140
+ // The shutdown procedure will only actually take place if "Now" was set to true OR
1141
+ // the VM started successfully, all tasks have been deleted and we were told to shutdown when all tasks were deleted.
1142
+ // Otherwise the call is just ignored.
1143
+ //
1144
+ // containerd calls this API on behalf of the user in the following cases:
1145
+ // * After any task is deleted via containerd's API
1146
+ // * After any task Create call returns an error
1161
1147
func (s * service ) Shutdown (requestCtx context.Context , req * taskAPI.ShutdownRequest ) (* ptypes.Empty , error ) {
1162
1148
defer logPanicAndDie (log .G (requestCtx ))
1163
- s .logger .WithFields (logrus.Fields {"task_id" : req .ID , "now" : req .Now }).Debug ("shutdown " )
1149
+ s .logger .WithFields (logrus.Fields {"task_id" : req .ID , "now" : req .Now }).Debug ("Shutdown " )
1164
1150
1165
1151
shouldShutdown := req .Now || s .exitAfterAllTasksDeleted && s .taskManager .ShutdownIfEmpty ()
1166
1152
if ! shouldShutdown {
1167
1153
return & ptypes.Empty {}, nil
1168
1154
}
1169
1155
1170
- // cancel the shim context no matter what, which will result in the VM getting a SIGKILL (if not already
1171
- // dead from graceful shutdown) and the shim process itself to begin exiting
1172
- defer s .shimCancel ()
1156
+ shutdownCtx , shutdownCancel := context .WithTimeout (context .Background (), defaultShutdownTimeout * time .Second )
1157
+ defer shutdownCancel ()
1173
1158
1174
- s .logger .Info ("stopping the VM" )
1159
+ if err := s .shutdown (requestCtx , shutdownCtx , req ); err != nil {
1160
+ return & ptypes.Empty {}, err
1161
+ }
1175
1162
1176
- var shutdownErr error
1163
+ return & ptypes.Empty {}, nil
1164
+ }
1177
1165
1178
- _ , err := s .agentClient .Shutdown (requestCtx , req )
1179
- if err != nil {
1180
- shutdownErr = multierror .Append (shutdownErr , errors .Wrap (err , "failed to shutdown VM Agent" ))
1181
- }
1166
+ // shutdown will shutdown the VMM. requestCtx is used to issue requests and shutdownCtx is used for waiting a graceful shutdown procedure.
1167
+ // When shutdownCtx's Done channel is closed during the graceful shutdown procedure, the VMM is forcefully killed.
1168
+ func (s * service ) shutdown (requestCtx , shutdownCtx context.Context , req * taskAPI.ShutdownRequest ) error {
1169
+ // cancel the shim at the end, in case even the last StopVMM() doesn't work
1170
+ defer s .shimCancel ()
1182
1171
1183
- err = s . machine . StopVMM ( )
1184
- if err != nil {
1185
- shutdownErr = multierror . Append ( shutdownErr , errors . Wrap ( err , "failed to gracefully stop VM" ))
1172
+ s . logger . Info ( "stopping the VM" )
1173
+ if _ , err := s . agentClient . Shutdown ( requestCtx , req ); err != nil {
1174
+ return err
1186
1175
}
1187
1176
1188
- err = os .RemoveAll (s .shimDir .RootPath ())
1189
- if err != nil {
1190
- shutdownErr = multierror .Append (shutdownErr , errors .Wrapf (err , "failed to remove VM dir %q during shutdown" , s .shimDir .RootPath ()))
1191
- }
1177
+ shutdownCh := make (chan error )
1178
+ go func () {
1179
+ defer close (shutdownCh )
1180
+ err := s .machine .Wait (shutdownCtx )
1181
+ shutdownCh <- err
1182
+ }()
1192
1183
1193
- if shutdownErr != nil {
1194
- s .logger .WithError (shutdownErr ).Error ()
1195
- return nil , shutdownErr
1184
+ var shutdownErr error
1185
+ select {
1186
+ case shutdownErr = <- shutdownCh :
1187
+ if shutdownErr == nil {
1188
+ s .logger .Info ("the VM has been stopped successfully" )
1189
+ return nil
1190
+ } else if exitErr , ok := shutdownErr .(* exec.ExitError ); ok {
1191
+ s .logger .WithError (exitErr ).Error ("the VM has been stopped, but not successfully" )
1192
+ return exitErr
1193
+ }
1194
+ s .logger .WithError (shutdownErr ).Error ("the VM returns unknown error" )
1195
+ case <- shutdownCtx .Done ():
1196
+ s .logger .Error ("the VM hasn't been stopped before the context's deadline" )
1196
1197
}
1197
1198
1198
- s .logger .Info ("successfully stopped the VM" )
1199
- return & ptypes.Empty {}, nil
1199
+ if err := s .machine .StopVMM (); err != nil {
1200
+ return err
1201
+ }
1202
+ return shutdownErr
1200
1203
}
1201
1204
1202
1205
func (s * service ) Stats (requestCtx context.Context , req * taskAPI.StatsRequest ) (* taskAPI.StatsResponse , error ) {
0 commit comments