Skip to content

Commit 1924e9e

Browse files
committed
[POC] Allow to run gvisor for a default docker container
1 parent 7e4acfc commit 1924e9e

File tree

6 files changed

+151
-139
lines changed

6 files changed

+151
-139
lines changed

runsc/cmd/do.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func (c *Do) Execute(_ context.Context, f *flag.FlagSet, args ...interface{}) su
8585
conf := args[0].(*config.Config)
8686
waitStatus := args[1].(*syscall.WaitStatus)
8787

88-
if conf.Rootless {
88+
if conf.Rootless && !conf.Unprivileged {
8989
if err := specutils.MaybeRunAsRoot(); err != nil {
9090
return Errorf("Error executing inside namespace: %v", err)
9191
}

runsc/cmd/gofer.go

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -107,26 +107,29 @@ func (g *Gofer) Execute(_ context.Context, f *flag.FlagSet, args ...interface{})
107107
Fatalf("reading spec: %v", err)
108108
}
109109

110-
if g.setUpRoot {
111-
if err := setupRootFS(spec, conf); err != nil {
112-
Fatalf("Error setting up root FS: %v", err)
110+
root := "/"
111+
if !conf.Unprivileged {
112+
if g.setUpRoot {
113+
if err := setupRootFS(spec, conf); err != nil {
114+
Fatalf("Error setting up root FS: %v", err)
115+
}
113116
}
114-
}
115-
if g.applyCaps {
116-
// Disable caps when calling myself again.
117-
// Note: minimal argument handling for the default case to keep it simple.
118-
args := os.Args
119-
args = append(args, "--apply-caps=false", "--setup-root=false")
120-
if err := setCapsAndCallSelf(args, goferCaps); err != nil {
121-
Fatalf("Unable to apply caps: %v", err)
117+
if g.applyCaps {
118+
// Disable caps when calling myself again.
119+
// Note: minimal argument handling for the default case to keep it simple.
120+
args := os.Args
121+
args = append(args, "--apply-caps=false", "--setup-root=false")
122+
if err := setCapsAndCallSelf(args, goferCaps); err != nil {
123+
Fatalf("Unable to apply caps: %v", err)
124+
}
125+
panic("unreachable")
122126
}
123-
panic("unreachable")
124-
}
125127

126-
// Find what path is going to be served by this gofer.
127-
root := spec.Root.Path
128-
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
129-
root = "/root"
128+
// Find what path is going to be served by this gofer.
129+
root = spec.Root.Path
130+
if !conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
131+
root = "/root"
132+
}
130133
}
131134

132135
// Resolve mount points paths, then replace mounts from our spec and send the

runsc/config/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ type Config struct {
133133
// mapped to the caller's user.
134134
Rootless bool `flag:"rootless"`
135135

136+
Unprivileged bool `flag:"unprivileged"`
137+
136138
// AlsoLogToStderr allows to send log messages to stderr.
137139
AlsoLogToStderr bool `flag:"alsologtostderr"`
138140

runsc/config/flags.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ func RegisterFlags() {
6161
flag.Int("panic-signal", -1, "register signal handling that panics. Usually set to SIGUSR2(12) to troubleshoot hangs. -1 disables it.")
6262
flag.Bool("profile", false, "prepares the sandbox to use Golang profiler. Note that enabling profiler loosens the seccomp protection added to the sandbox (DO NOT USE IN PRODUCTION).")
6363
flag.Bool("rootless", false, "it allows the sandbox to be started with a user that is not root. Sandbox and Gofer processes may run with same privileges as current user.")
64+
flag.Bool("unprivileged", false, "it allows the sandbox to be started with a user that is not root and doesn't have privileges to create a new user namespace. Sandbox and Gofer processes may run with same privileges as current user.")
6465
flag.Var(leakModePtr(refs.NoLeakChecking), "ref-leak-mode", "sets reference leak check mode: disabled (default), log-names, log-traces.")
6566
flag.Bool("cpu-num-from-quota", false, "set cpu number to cpu quota (least integer greater or equal to quota value, but not less than 2)")
6667
flag.Bool("oci-seccomp", false, "Enables loading OCI seccomp filters inside the sandbox.")

runsc/container/container.go

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -963,23 +963,26 @@ func (c *Container) createGoferProcess(spec *specs.Spec, conf *config.Config, bu
963963

964964
// Enter new namespaces to isolate from the rest of the system. Don't unshare
965965
// cgroup because gofer is added to a cgroup in the caller's namespace.
966-
nss := []specs.LinuxNamespace{
967-
{Type: specs.IPCNamespace},
968-
{Type: specs.MountNamespace},
969-
{Type: specs.NetworkNamespace},
970-
{Type: specs.PIDNamespace},
971-
{Type: specs.UTSNamespace},
972-
}
973-
974-
// Setup any uid/gid mappings, and create or join the configured user
975-
// namespace so the gofer's view of the filesystem aligns with the
976-
// users in the sandbox.
977-
userNS := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec)
978-
nss = append(nss, userNS...)
979-
specutils.SetUIDGIDMappings(cmd, spec)
980-
if len(userNS) != 0 {
981-
// We need to set UID and GID to have capabilities in a new user namespace.
982-
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
966+
nss := []specs.LinuxNamespace{}
967+
if !conf.Unprivileged {
968+
nss := []specs.LinuxNamespace{
969+
{Type: specs.IPCNamespace},
970+
{Type: specs.MountNamespace},
971+
{Type: specs.NetworkNamespace},
972+
{Type: specs.PIDNamespace},
973+
{Type: specs.UTSNamespace},
974+
}
975+
976+
// Setup any uid/gid mappings, and create or join the configured user
977+
// namespace so the gofer's view of the filesystem aligns with the
978+
// users in the sandbox.
979+
userNS := specutils.FilterNS([]specs.LinuxNamespaceType{specs.UserNamespace}, spec)
980+
nss = append(nss, userNS...)
981+
specutils.SetUIDGIDMappings(cmd, spec)
982+
if len(userNS) != 0 {
983+
// We need to set UID and GID to have capabilities in a new user namespace.
984+
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: 0, Gid: 0}
985+
}
983986
}
984987

985988
// Start the gofer in the given namespace.

runsc/sandbox/sandbox.go

Lines changed: 107 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -536,119 +536,122 @@ func (s *Sandbox) createSandboxProcess(conf *config.Config, args *Args, startSyn
536536
// are virtualized inside the sandbox. Be paranoid and run inside an empty
537537
// namespace for these. Don't unshare cgroup because sandbox is added to a
538538
// cgroup in the caller's namespace.
539-
log.Infof("Sandbox will be started in new mount, IPC and UTS namespaces")
540-
nss := []specs.LinuxNamespace{
541-
{Type: specs.IPCNamespace},
542-
{Type: specs.MountNamespace},
543-
{Type: specs.UTSNamespace},
544-
}
539+
nss := []specs.LinuxNamespace{}
540+
if !conf.Unprivileged {
541+
log.Infof("Sandbox will be started in new mount, IPC and UTS namespaces")
542+
nss = []specs.LinuxNamespace{
543+
{Type: specs.IPCNamespace},
544+
{Type: specs.MountNamespace},
545+
{Type: specs.UTSNamespace},
546+
}
545547

546-
if gPlatform.Requirements().RequiresCurrentPIDNS {
547-
// TODO(b/75837838): Also set a new PID namespace so that we limit
548-
// access to other host processes.
549-
log.Infof("Sandbox will be started in the current PID namespace")
550-
} else {
551-
log.Infof("Sandbox will be started in a new PID namespace")
552-
nss = append(nss, specs.LinuxNamespace{Type: specs.PIDNamespace})
553-
cmd.Args = append(cmd.Args, "--pidns=true")
554-
}
555-
556-
// Joins the network namespace if network is enabled. the sandbox talks
557-
// directly to the host network, which may have been configured in the
558-
// namespace.
559-
if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != config.NetworkNone {
560-
log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
561-
nss = append(nss, ns)
562-
} else if conf.Network == config.NetworkHost {
563-
log.Infof("Sandbox will be started in the host network namespace")
564-
} else {
565-
log.Infof("Sandbox will be started in new network namespace")
566-
nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
567-
}
568-
569-
// User namespace depends on the network type. Host network requires to run
570-
// inside the user namespace specified in the spec or the current namespace
571-
// if none is configured.
572-
if conf.Network == config.NetworkHost {
573-
if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok {
574-
log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
575-
nss = append(nss, userns)
576-
specutils.SetUIDGIDMappings(cmd, args.Spec)
548+
if gPlatform.Requirements().RequiresCurrentPIDNS {
549+
// TODO(b/75837838): Also set a new PID namespace so that we limit
550+
// access to other host processes.
551+
log.Infof("Sandbox will be started in the current PID namespace")
577552
} else {
578-
log.Infof("Sandbox will be started in the current user namespace")
553+
log.Infof("Sandbox will be started in a new PID namespace")
554+
nss = append(nss, specs.LinuxNamespace{Type: specs.PIDNamespace})
555+
cmd.Args = append(cmd.Args, "--pidns=true")
579556
}
580-
// When running in the caller's defined user namespace, apply the same
581-
// capabilities to the sandbox process to ensure it abides to the same
582-
// rules.
583-
cmd.Args = append(cmd.Args, "--apply-caps=true")
584-
585-
// If we have CAP_SYS_ADMIN, we can create an empty chroot and
586-
// bind-mount the executable inside it.
587-
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
588-
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
589-
590-
} else if specutils.HasCapabilities(capability.CAP_SYS_ADMIN) {
591-
log.Infof("Sandbox will be started in minimal chroot")
592-
cmd.Args = append(cmd.Args, "--setup-root")
557+
558+
// Joins the network namespace if network is enabled. the sandbox talks
559+
// directly to the host network, which may have been configured in the
560+
// namespace.
561+
if ns, ok := specutils.GetNS(specs.NetworkNamespace, args.Spec); ok && conf.Network != config.NetworkNone {
562+
log.Infof("Sandbox will be started in the container's network namespace: %+v", ns)
563+
nss = append(nss, ns)
564+
} else if conf.Network == config.NetworkHost {
565+
log.Infof("Sandbox will be started in the host network namespace")
593566
} else {
594-
return fmt.Errorf("can't run sandbox process in minimal chroot since we don't have CAP_SYS_ADMIN")
567+
log.Infof("Sandbox will be started in new network namespace")
568+
nss = append(nss, specs.LinuxNamespace{Type: specs.NetworkNamespace})
595569
}
596-
} else {
597-
// If we have CAP_SETUID and CAP_SETGID, then we can also run
598-
// as user nobody.
599-
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
600-
log.Warningf("Running sandbox in test mode as current user (uid=%d gid=%d). This is only safe in tests!", os.Getuid(), os.Getgid())
601-
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
602-
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
603-
log.Infof("Sandbox will be started in new user namespace")
604-
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
605-
cmd.Args = append(cmd.Args, "--setup-root")
606-
607-
const nobody = 65534
608-
if conf.Rootless {
609-
log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
610-
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
611-
{
612-
ContainerID: nobody,
613-
HostID: os.Getuid(),
614-
Size: 1,
615-
},
616-
}
617-
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
618-
{
619-
ContainerID: nobody,
620-
HostID: os.Getgid(),
621-
Size: 1,
622-
},
623-
}
624570

571+
// User namespace depends on the network type. Host network requires to run
572+
// inside the user namespace specified in the spec or the current namespace
573+
// if none is configured.
574+
if conf.Network == config.NetworkHost {
575+
if userns, ok := specutils.GetNS(specs.UserNamespace, args.Spec); ok {
576+
log.Infof("Sandbox will be started in container's user namespace: %+v", userns)
577+
nss = append(nss, userns)
578+
specutils.SetUIDGIDMappings(cmd, args.Spec)
625579
} else {
626-
// Map nobody in the new namespace to nobody in the parent namespace.
627-
//
628-
// A sandbox process will construct an empty
629-
// root for itself, so it has to have
630-
// CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
631-
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
632-
{
633-
ContainerID: nobody,
634-
HostID: nobody,
635-
Size: 1,
636-
},
637-
}
638-
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
639-
{
640-
ContainerID: nobody,
641-
HostID: nobody,
642-
Size: 1,
643-
},
644-
}
580+
log.Infof("Sandbox will be started in the current user namespace")
581+
}
582+
// When running in the caller's defined user namespace, apply the same
583+
// capabilities to the sandbox process to ensure it abides to the same
584+
// rules.
585+
cmd.Args = append(cmd.Args, "--apply-caps=true")
586+
587+
// If we have CAP_SYS_ADMIN, we can create an empty chroot and
588+
// bind-mount the executable inside it.
589+
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
590+
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
591+
592+
} else if specutils.HasCapabilities(capability.CAP_SYS_ADMIN) {
593+
log.Infof("Sandbox will be started in minimal chroot")
594+
cmd.Args = append(cmd.Args, "--setup-root")
595+
} else {
596+
return fmt.Errorf("can't run sandbox process in minimal chroot since we don't have CAP_SYS_ADMIN")
645597
}
646-
647-
// Set credentials to run as user and group nobody.
648-
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody}
649-
cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT))
650598
} else {
651-
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
599+
// If we have CAP_SETUID and CAP_SETGID, then we can also run
600+
// as user nobody.
601+
if conf.TestOnlyAllowRunAsCurrentUserWithoutChroot {
602+
log.Warningf("Running sandbox in test mode as current user (uid=%d gid=%d). This is only safe in tests!", os.Getuid(), os.Getgid())
603+
log.Warningf("Running sandbox in test mode without chroot. This is only safe in tests!")
604+
} else if specutils.HasCapabilities(capability.CAP_SETUID, capability.CAP_SETGID) {
605+
log.Infof("Sandbox will be started in new user namespace")
606+
nss = append(nss, specs.LinuxNamespace{Type: specs.UserNamespace})
607+
cmd.Args = append(cmd.Args, "--setup-root")
608+
609+
const nobody = 65534
610+
if conf.Rootless {
611+
log.Infof("Rootless mode: sandbox will run as nobody inside user namespace, mapped to the current user, uid: %d, gid: %d", os.Getuid(), os.Getgid())
612+
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
613+
{
614+
ContainerID: nobody,
615+
HostID: os.Getuid(),
616+
Size: 1,
617+
},
618+
}
619+
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
620+
{
621+
ContainerID: nobody,
622+
HostID: os.Getgid(),
623+
Size: 1,
624+
},
625+
}
626+
627+
} else {
628+
// Map nobody in the new namespace to nobody in the parent namespace.
629+
//
630+
// A sandbox process will construct an empty
631+
// root for itself, so it has to have
632+
// CAP_SYS_ADMIN and CAP_SYS_CHROOT capabilities.
633+
cmd.SysProcAttr.UidMappings = []syscall.SysProcIDMap{
634+
{
635+
ContainerID: nobody,
636+
HostID: nobody,
637+
Size: 1,
638+
},
639+
}
640+
cmd.SysProcAttr.GidMappings = []syscall.SysProcIDMap{
641+
{
642+
ContainerID: nobody,
643+
HostID: nobody,
644+
Size: 1,
645+
},
646+
}
647+
}
648+
649+
// Set credentials to run as user and group nobody.
650+
cmd.SysProcAttr.Credential = &syscall.Credential{Uid: nobody, Gid: nobody}
651+
cmd.SysProcAttr.AmbientCaps = append(cmd.SysProcAttr.AmbientCaps, uintptr(capability.CAP_SYS_ADMIN), uintptr(capability.CAP_SYS_CHROOT))
652+
} else {
653+
return fmt.Errorf("can't run sandbox process as user nobody since we don't have CAP_SETUID or CAP_SETGID")
654+
}
652655
}
653656
}
654657

0 commit comments

Comments
 (0)