Skip to content

Commit b5aa5b7

Browse files
manninglucasgvisor-bot
authored andcommitted
Add support for executing a binary before saving and after restoring.
PiperOrigin-RevId: 752351058
1 parent 3e5886f commit b5aa5b7

File tree

3 files changed

+135
-0
lines changed

3 files changed

+135
-0
lines changed

pkg/sentry/control/state.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package control
1717
import (
1818
"errors"
1919
"fmt"
20+
"time"
2021

2122
"gvisor.dev/gvisor/pkg/sentry/kernel"
2223
"gvisor.dev/gvisor/pkg/sentry/pgalloc"
@@ -25,6 +26,18 @@ import (
2526
"gvisor.dev/gvisor/pkg/urpc"
2627
)
2728

29+
const (
30+
// SaveRestoreBinTimeoutKey is the key used to save the timeout for the
31+
// save/restore binary in the metadata during save/restore.
32+
SaveRestoreBinTimeoutKey = "save-restore-bin-timeout"
33+
34+
// SaveRestoreBinPathKey is the key used to save the path to the save/restore
35+
// binary in the metadata during save/restore.
36+
SaveRestoreBinPathKey = "save-restore-bin-path"
37+
38+
defaultSaveRestoreBinTimeout = 10 * time.Second
39+
)
40+
2841
// ErrInvalidFiles is returned when the urpc call to Save does not include an
2942
// appropriate file payload (e.g. there is no output file!).
3043
var ErrInvalidFiles = errors.New("exactly one file must be provided")
@@ -97,5 +110,20 @@ func (s *State) Save(o *SaveOpts, _ *struct{}) error {
97110
}
98111
defer saveOpts.PagesFile.Close()
99112
}
113+
if saveRestoreBinPath, ok := o.Metadata[SaveRestoreBinPathKey]; ok {
114+
saveRestoreBinTimeout := defaultSaveRestoreBinTimeout
115+
if saveRestoreBinTimeoutString, ok := o.Metadata[SaveRestoreBinTimeoutKey]; ok {
116+
var err error
117+
saveRestoreBinTimeout, err = time.ParseDuration(saveRestoreBinTimeoutString)
118+
if err != nil {
119+
return fmt.Errorf("failed to parse save/restore bin timeout: %w", err)
120+
}
121+
}
122+
s.Kernel.SaveRestoreBinPath = saveRestoreBinPath
123+
s.Kernel.SaveRestoreBinTimeout = saveRestoreBinTimeout
124+
if _, err := s.Kernel.ExecSaveRestoreBin(kernel.SaveRestoreBinSave); err != nil {
125+
return fmt.Errorf("failed to exec save/restore binary: %w", err)
126+
}
127+
}
100128
return saveOpts.Save(s.Kernel.SupervisorContext(), s.Kernel, s.Watchdog)
101129
}

pkg/sentry/kernel/kernel.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ import (
8686
// allow easy access everywhere.
8787
var IOUringEnabled = false
8888

89+
// SaveRestoreBinMode is the mode for the save/restore binary.
90+
type SaveRestoreBinMode string
91+
92+
const (
93+
// SaveRestoreBinSave is the save mode for the save/restore binary.
94+
SaveRestoreBinSave SaveRestoreBinMode = "save"
95+
// SaveRestoreBinRestore is the restore mode for the save/restore binary.
96+
SaveRestoreBinRestore SaveRestoreBinMode = "restore"
97+
)
98+
8999
// UserCounters is a set of user counters.
90100
//
91101
// +stateify savable
@@ -370,6 +380,16 @@ type Kernel struct {
370380

371381
// UnixSocketOpts stores configuration options for management of unix sockets.
372382
UnixSocketOpts transport.UnixSocketOpts
383+
384+
// SaveRestoreBinPath is the path to the save/restore binary. It is executed
385+
// with the argument "save" before the kernel is saved and "restore" after
386+
// the kernel is restored and restarted.
387+
SaveRestoreBinPath string
388+
389+
// SaveRestoreBinTimeout is the timeout for the save/restore binary. If the
390+
// binary fails to exit within this timeout the save/restore operation will
391+
// fail.
392+
SaveRestoreBinTimeout time.Duration
373393
}
374394

375395
// InitKernelArgs holds arguments to Init.
@@ -2072,3 +2092,85 @@ func (k *Kernel) ContainerName(cid string) string {
20722092
defer k.extMu.Unlock()
20732093
return k.containerNames[cid]
20742094
}
2095+
2096+
// ExecSaveRestoreBin creates a new process that executes the save/restore
2097+
// binary. If the kernel has been started, the process is immediately started
2098+
// and the method waits for it to exit. Otherwise, the caller is responsible
2099+
// for starting and waiting for the process.
2100+
func (k *Kernel) ExecSaveRestoreBin(mode SaveRestoreBinMode) (*ThreadGroup, error) {
2101+
if k.SaveRestoreBinPath == "" {
2102+
return nil, nil
2103+
}
2104+
sctx := k.SupervisorContext()
2105+
leader := k.GlobalInit().Leader()
2106+
contID := leader.ContainerID()
2107+
mntns := leader.MountNamespace()
2108+
if mntns == nil || !mntns.TryIncRef() {
2109+
log.Warningf("PID %d in container %q has exited, skipping CUDA checkpoint for it", leader.ThreadGroup().ID(), contID)
2110+
return nil, nil
2111+
}
2112+
fdTable := leader.FDTable()
2113+
fdTable.IncRef()
2114+
root := mntns.Root(sctx)
2115+
cu := cleanup.Make(func() {
2116+
root.DecRef(sctx)
2117+
})
2118+
defer cu.Clean()
2119+
ctx := vfs.WithRoot(sctx, root)
2120+
cu.Add(func() {
2121+
mntns.DecRef(ctx)
2122+
})
2123+
2124+
argv := []string{k.SaveRestoreBinPath, string(mode)}
2125+
leader.FDTable().IncRef()
2126+
cu.Add(func() {
2127+
fdTable.DecRef(ctx)
2128+
})
2129+
defer leader.FDTable().DecRef(ctx)
2130+
2131+
mntns.IncRef()
2132+
args := CreateProcessArgs{
2133+
Filename: argv[0],
2134+
Argv: argv,
2135+
ContainerID: contID,
2136+
MountNamespace: mntns,
2137+
PIDNamespace: k.RootPIDNamespace(),
2138+
UTSNamespace: k.RootUTSNamespace(),
2139+
IPCNamespace: k.RootIPCNamespace(),
2140+
Credentials: leader.Credentials(),
2141+
Umask: 0022,
2142+
Limits: limits.NewLimitSet(),
2143+
FDTable: fdTable,
2144+
Origin: OriginExec,
2145+
}
2146+
tg, _, err := k.CreateProcess(args)
2147+
if err != nil {
2148+
return nil, fmt.Errorf("failed to create process: %w", err)
2149+
}
2150+
if k.started {
2151+
k.StartProcess(tg)
2152+
return nil, k.WaitForSaveRestoreBin(tg)
2153+
}
2154+
return tg, nil
2155+
}
2156+
2157+
// WaitForSaveRestoreBin waits for the save/restore binary to exit. If the
2158+
// SaveRestoreBinTimeout is exceeded, the save/restore binary is killed and
2159+
// the method returns an error.
2160+
func (k *Kernel) WaitForSaveRestoreBin(saveRestoreTg *ThreadGroup) error {
2161+
waitC := make(chan struct{})
2162+
go func() {
2163+
saveRestoreTg.WaitExited()
2164+
waitC <- struct{}{}
2165+
}()
2166+
select {
2167+
case <-waitC:
2168+
if saveRestoreTg.ExitStatus() != 0 {
2169+
return fmt.Errorf("%v exited with non-zero status %d", k.SaveRestoreBinPath, saveRestoreTg.ExitStatus())
2170+
}
2171+
case <-time.After(k.SaveRestoreBinTimeout):
2172+
saveRestoreTg.SendSignal(&linux.SignalInfo{Signo: int32(linux.SIGKILL)})
2173+
return fmt.Errorf("%s timed out after %v", k.SaveRestoreBinPath, k.SaveRestoreBinTimeout)
2174+
}
2175+
return nil
2176+
}

runsc/boot/restore.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,11 @@ func (r *restorer) restore(l *Loader) error {
257257
if err := l.k.LoadFrom(ctx, r.stateFile, r.asyncMFLoader == nil, nil, oldInetStack, time.NewCalibratedClocks(), &vfs.CompleteRestoreOptions{}, l.saveRestoreNet); err != nil {
258258
return fmt.Errorf("failed to load kernel: %w", err)
259259
}
260+
// The kernel should already have been started at this point, so we can
261+
// immediately wait for the save/restore binary to be ready.
262+
if _, err := l.k.ExecSaveRestoreBin(kernel.SaveRestoreBinRestore); err != nil {
263+
return fmt.Errorf("failed to wait for save/restore binary: %w", err)
264+
}
260265

261266
if r.asyncMFLoader != nil {
262267
if r.background {

0 commit comments

Comments
 (0)