diff --git a/.gitignore b/.gitignore index 1639997a5..d9cb32460 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .idea/ bin/ +runtime/logs *stamp diff --git a/examples/etc/containerd/firecracker-runtime.json b/examples/etc/containerd/firecracker-runtime.json index d78fcc763..7c0bea365 100644 --- a/examples/etc/containerd/firecracker-runtime.json +++ b/examples/etc/containerd/firecracker-runtime.json @@ -5,5 +5,8 @@ "root_drive": "/var/lib/firecracker-containerd/runtime/default-rootfs.img", "cpu_count": 1, "cpu_template": "T2", - "log_level": "Debug" + "log_level": "Debug", + "jailer": { + "runc_binary_path": "/usr/local/bin/runc" + } } diff --git a/proto/firecracker.pb.go b/proto/firecracker.pb.go index f363aaeac..3837f5606 100644 --- a/proto/firecracker.pb.go +++ b/proto/firecracker.pb.go @@ -39,10 +39,11 @@ type CreateVMRequest struct { // The number of dummy drives to reserve in advance before running FC instance. ContainerCount int32 `protobuf:"varint,8,opt,name=ContainerCount,proto3" json:"ContainerCount,omitempty"` // Whether the VM should exit after all tasks running in it have been deleted. - ExitAfterAllTasksDeleted bool `protobuf:"varint,9,opt,name=ExitAfterAllTasksDeleted,proto3" json:"ExitAfterAllTasksDeleted,omitempty"` - XXX_NoUnkeyedLiteral struct{} `json:"-"` - XXX_unrecognized []byte `json:"-"` - XXX_sizecache int32 `json:"-"` + ExitAfterAllTasksDeleted bool `protobuf:"varint,9,opt,name=ExitAfterAllTasksDeleted,proto3" json:"ExitAfterAllTasksDeleted,omitempty"` + JailerConfig *JailerConfig `protobuf:"bytes,10,opt,name=JailerConfig,proto3" json:"JailerConfig,omitempty"` + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` } func (m *CreateVMRequest) Reset() { *m = CreateVMRequest{} } @@ -132,6 +133,13 @@ func (m *CreateVMRequest) GetExitAfterAllTasksDeleted() bool { return false } +func (m *CreateVMRequest) GetJailerConfig() *JailerConfig { + if m != nil { + return m.JailerConfig + } + return nil +} + type StopVMRequest struct { VMID string `protobuf:"bytes,1,opt,name=VMID,proto3" json:"VMID,omitempty"` TimeoutSeconds uint32 `protobuf:"varint,2,opt,name=TimeoutSeconds,proto3" json:"TimeoutSeconds,omitempty"` @@ -332,44 +340,77 @@ func (m *SetVMMetadataRequest) GetMetadata() string { return "" } +type JailerConfig struct { + XXX_NoUnkeyedLiteral struct{} `json:"-"` + XXX_unrecognized []byte `json:"-"` + XXX_sizecache int32 `json:"-"` +} + +func (m *JailerConfig) Reset() { *m = JailerConfig{} } +func (m *JailerConfig) String() string { return proto.CompactTextString(m) } +func (*JailerConfig) ProtoMessage() {} +func (*JailerConfig) Descriptor() ([]byte, []int) { + return fileDescriptor_a73317e9fb8da571, []int{5} +} +func (m *JailerConfig) XXX_Unmarshal(b []byte) error { + return xxx_messageInfo_JailerConfig.Unmarshal(m, b) +} +func (m *JailerConfig) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + return xxx_messageInfo_JailerConfig.Marshal(b, m, deterministic) +} +func (m *JailerConfig) XXX_Merge(src proto.Message) { + xxx_messageInfo_JailerConfig.Merge(m, src) +} +func (m *JailerConfig) XXX_Size() int { + return xxx_messageInfo_JailerConfig.Size(m) +} +func (m *JailerConfig) XXX_DiscardUnknown() { + xxx_messageInfo_JailerConfig.DiscardUnknown(m) +} + +var xxx_messageInfo_JailerConfig proto.InternalMessageInfo + func init() { proto.RegisterType((*CreateVMRequest)(nil), "CreateVMRequest") proto.RegisterType((*StopVMRequest)(nil), "StopVMRequest") proto.RegisterType((*GetVMInfoRequest)(nil), "GetVMInfoRequest") proto.RegisterType((*GetVMInfoResponse)(nil), "GetVMInfoResponse") proto.RegisterType((*SetVMMetadataRequest)(nil), "SetVMMetadataRequest") + proto.RegisterType((*JailerConfig)(nil), "JailerConfig") } func init() { proto.RegisterFile("firecracker.proto", fileDescriptor_a73317e9fb8da571) } var fileDescriptor_a73317e9fb8da571 = []byte{ - // 446 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x7c, 0x93, 0x4f, 0x6f, 0xd3, 0x4c, - 0x10, 0xc6, 0xe5, 0x37, 0x4d, 0x1b, 0x4f, 0xd4, 0x3f, 0x59, 0xbd, 0x87, 0x55, 0x55, 0x21, 0xcb, - 0x87, 0xc8, 0xa7, 0x22, 0xc1, 0x0d, 0x09, 0x89, 0x90, 0x10, 0x64, 0x8a, 0x11, 0xda, 0x54, 0x39, - 0x70, 0x5b, 0xec, 0xb1, 0xbb, 0x8a, 0xbb, 0x1b, 0x76, 0x27, 0x50, 0x3e, 0x16, 0x1f, 0x85, 0x6f, - 0x84, 0xbc, 0xa1, 0x8d, 0xe5, 0x96, 0x9c, 0x92, 0xfd, 0xcd, 0xf3, 0xec, 0x3c, 0xf6, 0x8c, 0x61, - 0x54, 0x2a, 0x8b, 0xb9, 0x95, 0xf9, 0x0a, 0xed, 0xe5, 0xda, 0x1a, 0x32, 0xe7, 0x43, 0xfa, 0xb9, - 0x46, 0xb7, 0x3d, 0xc4, 0xbf, 0x7b, 0x70, 0x3a, 0xb5, 0x28, 0x09, 0x97, 0x99, 0xc0, 0x6f, 0x1b, - 0x74, 0xc4, 0x18, 0x1c, 0x2c, 0xb3, 0x74, 0xc6, 0x83, 0x28, 0x48, 0x42, 0xe1, 0xff, 0xb3, 0x37, - 0x00, 0x99, 0xcc, 0x6f, 0x94, 0xc6, 0x69, 0x59, 0xf1, 0xff, 0xa2, 0x20, 0x19, 0xbe, 0x88, 0x2e, - 0xe7, 0xbb, 0xcb, 0xef, 0xab, 0x46, 0x97, 0xaa, 0xda, 0x58, 0x49, 0xca, 0x68, 0xd1, 0xf2, 0xb0, - 0x04, 0x4e, 0xaf, 0xd0, 0x6a, 0xac, 0xd3, 0x5b, 0x59, 0xe1, 0x67, 0x49, 0x37, 0xbc, 0xe7, 0x1b, - 0x74, 0x31, 0x7b, 0x06, 0xb0, 0x45, 0x13, 0x5b, 0x39, 0x7e, 0xe0, 0x45, 0x2d, 0xc2, 0x9e, 0x43, - 0x28, 0x8c, 0xa1, 0x99, 0x55, 0xdf, 0x91, 0xf7, 0x7d, 0x94, 0x51, 0x3b, 0x8a, 0x2f, 0x88, 0x9d, - 0x86, 0xbd, 0x86, 0xb3, 0x49, 0x51, 0xa8, 0x26, 0x92, 0xac, 0x3d, 0x72, 0xfc, 0x30, 0xea, 0x3d, - 0xed, 0x7b, 0x24, 0x65, 0x1f, 0x60, 0xf4, 0x09, 0xe9, 0x87, 0xb1, 0xab, 0x54, 0x13, 0xda, 0x52, - 0xe6, 0xe8, 0xf8, 0x91, 0xf7, 0x5f, 0xb4, 0xfd, 0x5d, 0x91, 0x78, 0x6c, 0x63, 0x63, 0x38, 0x99, - 0x1a, 0x4d, 0x52, 0x69, 0xb4, 0x53, 0xb3, 0xd1, 0xc4, 0x07, 0x51, 0x90, 0xf4, 0x45, 0x87, 0xb2, - 0x57, 0xc0, 0xdf, 0xdd, 0x29, 0x9a, 0x94, 0x84, 0x76, 0x52, 0xd7, 0xd7, 0xd2, 0xad, 0xdc, 0x0c, - 0x6b, 0x24, 0x2c, 0x78, 0x18, 0x05, 0xc9, 0x40, 0xfc, 0xb3, 0x1e, 0x5f, 0xc1, 0xf1, 0x82, 0xcc, - 0x7a, 0xff, 0x40, 0xc7, 0x70, 0x72, 0xad, 0x6e, 0xd1, 0x6c, 0x68, 0x81, 0xb9, 0xd1, 0x85, 0xf3, - 0x43, 0x3d, 0x16, 0x1d, 0x1a, 0x8f, 0xe1, 0xec, 0x3d, 0xd2, 0x32, 0x4b, 0x75, 0x69, 0xf6, 0xdc, - 0x17, 0xff, 0x0a, 0x60, 0xd4, 0x12, 0xba, 0xb5, 0xd1, 0x0e, 0x9f, 0xec, 0x7c, 0x01, 0x61, 0xf3, - 0xb0, 0x78, 0x47, 0xe9, 0xec, 0x6f, 0xd3, 0x1d, 0x68, 0x86, 0xbf, 0x30, 0xf9, 0x0a, 0xa9, 0xb5, - 0x21, 0x2d, 0xc2, 0x22, 0x18, 0x7e, 0x34, 0xd5, 0x5c, 0x95, 0xc6, 0x0b, 0xb6, 0xdb, 0xd1, 0x46, - 0xcd, 0xa2, 0x65, 0x48, 0x56, 0xe5, 0xee, 0x41, 0xd5, 0xdf, 0x2e, 0x5a, 0x07, 0xc7, 0x73, 0xf8, - 0x7f, 0xd1, 0x44, 0xce, 0x90, 0x64, 0x21, 0x49, 0xee, 0x7b, 0x5f, 0xe7, 0x30, 0xb8, 0x97, 0xf9, - 0xd0, 0xa1, 0x78, 0x38, 0xbf, 0x3d, 0xfa, 0xd2, 0xf7, 0x5f, 0xd3, 0xd7, 0x43, 0xff, 0xf3, 0xf2, - 0x4f, 0x00, 0x00, 0x00, 0xff, 0xff, 0xad, 0x3b, 0x3d, 0xc6, 0x76, 0x03, 0x00, 0x00, + // 467 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x7c, 0x93, 0xcf, 0x6f, 0xd3, 0x30, + 0x14, 0xc7, 0x15, 0xd6, 0x6e, 0xed, 0x2b, 0xed, 0x56, 0x8b, 0x83, 0x35, 0x4d, 0x28, 0xca, 0xa1, + 0xea, 0xa9, 0x08, 0xb8, 0x21, 0x21, 0x51, 0x5a, 0x8a, 0xba, 0x11, 0x84, 0xdc, 0xa9, 0x07, 0x6e, + 0x26, 0x79, 0xc9, 0xac, 0x66, 0x76, 0xb1, 0x5f, 0x61, 0xdc, 0xf9, 0x87, 0xf8, 0x0f, 0x51, 0x5c, + 0xb6, 0x66, 0xd9, 0xe8, 0x29, 0xf1, 0xe7, 0x7d, 0xdf, 0x0f, 0x3d, 0x7f, 0x0d, 0xfd, 0x4c, 0x59, + 0x4c, 0xac, 0x4c, 0x56, 0x68, 0x47, 0x6b, 0x6b, 0xc8, 0x9c, 0x76, 0xe8, 0xd7, 0x1a, 0xdd, 0xf6, + 0x10, 0xfd, 0x6e, 0xc0, 0xf1, 0xc4, 0xa2, 0x24, 0x5c, 0xc6, 0x02, 0xbf, 0x6f, 0xd0, 0x11, 0x63, + 0xd0, 0x58, 0xc6, 0xf3, 0x29, 0x0f, 0xc2, 0x60, 0xd8, 0x16, 0xfe, 0x9f, 0xbd, 0x03, 0x88, 0x65, + 0x72, 0xa5, 0x34, 0x4e, 0xb2, 0x9c, 0x3f, 0x09, 0x83, 0x61, 0xe7, 0x55, 0x38, 0x9a, 0xed, 0x8a, + 0xdf, 0x46, 0x8d, 0xce, 0x54, 0xbe, 0xb1, 0x92, 0x94, 0xd1, 0xa2, 0x92, 0xc3, 0x86, 0x70, 0x7c, + 0x81, 0x56, 0x63, 0x31, 0xbf, 0x96, 0x39, 0x7e, 0x91, 0x74, 0xc5, 0x0f, 0x7c, 0x83, 0x3a, 0x66, + 0xcf, 0x01, 0xb6, 0x68, 0x6c, 0x73, 0xc7, 0x1b, 0x5e, 0x54, 0x21, 0xec, 0x05, 0xb4, 0x85, 0x31, + 0x34, 0xb5, 0xea, 0x07, 0xf2, 0xa6, 0x1f, 0xa5, 0x5f, 0x1d, 0xc5, 0x07, 0xc4, 0x4e, 0xc3, 0xde, + 0xc2, 0xc9, 0x38, 0x4d, 0x55, 0x39, 0x92, 0x2c, 0x3c, 0x72, 0xfc, 0x30, 0x3c, 0x78, 0x3c, 0xef, + 0x81, 0x94, 0x9d, 0x43, 0xff, 0x33, 0xd2, 0x4f, 0x63, 0x57, 0x73, 0x4d, 0x68, 0x33, 0x99, 0xa0, + 0xe3, 0x47, 0x3e, 0xff, 0xac, 0x9a, 0x5f, 0x17, 0x89, 0x87, 0x69, 0x6c, 0x00, 0xbd, 0x89, 0xd1, + 0x24, 0x95, 0x46, 0x3b, 0x31, 0x1b, 0x4d, 0xbc, 0x15, 0x06, 0xc3, 0xa6, 0xa8, 0x51, 0xf6, 0x06, + 0xf8, 0x87, 0x1b, 0x45, 0xe3, 0x8c, 0xd0, 0x8e, 0x8b, 0xe2, 0x52, 0xba, 0x95, 0x9b, 0x62, 0x81, + 0x84, 0x29, 0x6f, 0x87, 0xc1, 0xb0, 0x25, 0xfe, 0x1b, 0x67, 0x2f, 0xe1, 0xe9, 0xb9, 0x54, 0x45, + 0x59, 0xaa, 0xbc, 0x0c, 0x0e, 0x7e, 0x45, 0xdd, 0x51, 0x15, 0x8a, 0x7b, 0x92, 0xe8, 0x02, 0xba, + 0x0b, 0x32, 0xeb, 0xfd, 0x1e, 0x18, 0x40, 0xef, 0x52, 0x5d, 0xa3, 0xd9, 0xd0, 0x02, 0x13, 0xa3, + 0x53, 0xe7, 0x7d, 0xd0, 0x15, 0x35, 0x1a, 0x0d, 0xe0, 0xe4, 0x23, 0xd2, 0x32, 0x9e, 0xeb, 0xcc, + 0xec, 0xa9, 0x17, 0xfd, 0x09, 0xa0, 0x5f, 0x11, 0xba, 0xb5, 0xd1, 0x0e, 0x1f, 0xed, 0x7c, 0x06, + 0xed, 0x72, 0x3f, 0x78, 0x43, 0xf3, 0xe9, 0xbf, 0xa6, 0x3b, 0x50, 0xfa, 0x65, 0x61, 0x92, 0x15, + 0x52, 0xc5, 0x54, 0x15, 0xc2, 0x42, 0xe8, 0x7c, 0x32, 0xf9, 0x4c, 0x65, 0xc6, 0x0b, 0xb6, 0x86, + 0xaa, 0xa2, 0xd2, 0x9b, 0x31, 0x92, 0x55, 0x89, 0xbb, 0x53, 0x35, 0xb7, 0xde, 0xac, 0xe1, 0x68, + 0x06, 0xcf, 0x16, 0xe5, 0xc8, 0x31, 0x92, 0x4c, 0x25, 0xc9, 0x7d, 0xfb, 0x3a, 0x85, 0xd6, 0xad, + 0xcc, 0x0f, 0xdd, 0x16, 0x77, 0xe7, 0xa8, 0x77, 0xff, 0x8e, 0xde, 0x1f, 0x7d, 0x6d, 0xfa, 0x07, + 0xf9, 0xed, 0xd0, 0x7f, 0x5e, 0xff, 0x0d, 0x00, 0x00, 0xff, 0xff, 0x98, 0x66, 0x19, 0x2a, 0xb9, + 0x03, 0x00, 0x00, } diff --git a/proto/firecracker.proto b/proto/firecracker.proto index e04caf79d..bff0d4197 100644 --- a/proto/firecracker.proto +++ b/proto/firecracker.proto @@ -32,6 +32,8 @@ message CreateVMRequest { // Whether the VM should exit after all tasks running in it have been deleted. bool ExitAfterAllTasksDeleted = 9; + + JailerConfig JailerConfig = 10; } message StopVMRequest { @@ -54,4 +56,7 @@ message GetVMInfoResponse { message SetVMMetadataRequest { string VMID = 1; string Metadata = 2; -} \ No newline at end of file +} + +message JailerConfig { +} diff --git a/runtime/config.go b/runtime/config.go index 428c93865..b3719808a 100644 --- a/runtime/config.go +++ b/runtime/config.go @@ -47,11 +47,17 @@ type Config struct { LogLevel string `json:"log_level"` HtEnabled bool `json:"ht_enabled"` Debug bool `json:"debug"` - // If a CreateVM call specifies no network interfaces and DefaultNetworkInterfaces is non-empty, // the VM will default to using the network interfaces as specified here. This is especially // useful when a CNI-based network interface is provided in DefaultNetworkInterfaces. DefaultNetworkInterfaces []proto.FirecrackerNetworkInterface `json:"default_network_interfaces"` + JailerConfig JailerConfig `json:"jailer"` +} + +// JailerConfig houses a set of configurable values for jailing +// TODO: Add netns field +type JailerConfig struct { + RuncBinaryPath string `json:"runc_binary_path"` } // LoadConfig loads configuration from JSON file at 'path' @@ -76,6 +82,7 @@ func LoadConfig(path string) (*Config, error) { CPUCount: defaultCPUCount, CPUTemplate: string(defaultCPUTemplate), } + if err := json.Unmarshal(data, cfg); err != nil { return nil, errors.Wrapf(err, "failed to unmarshal config from %q", path) } diff --git a/runtime/drive_handler.go b/runtime/drive_handler.go index 552eac9a4..b82bb0d48 100644 --- a/runtime/drive_handler.go +++ b/runtime/drive_handler.go @@ -51,7 +51,10 @@ type stubDriveHandler struct { mutex sync.Mutex } -func newStubDriveHandler(path string, logger *logrus.Entry, count int) (*stubDriveHandler, error) { +// stubDrivesOpt is used to make and modify changes to the stub drives. +type stubDrivesOpt func(stubDrives []models.Drive) error + +func newStubDriveHandler(path string, logger *logrus.Entry, count int, opts ...stubDrivesOpt) (*stubDriveHandler, error) { h := stubDriveHandler{ RootPath: path, logger: logger, @@ -60,6 +63,13 @@ func newStubDriveHandler(path string, logger *logrus.Entry, count int) (*stubDri if err != nil { return nil, err } + + for _, opt := range opts { + if err := opt(drives); err != nil { + h.logger.WithError(err).Debug("failed to apply option to stub drives") + return nil, err + } + } h.drives = drives return &h, nil } diff --git a/runtime/firecracker-runc-config.json.example b/runtime/firecracker-runc-config.json.example new file mode 100644 index 000000000..9b2666a0e --- /dev/null +++ b/runtime/firecracker-runc-config.json.example @@ -0,0 +1,130 @@ +{ + "ociVersion": "1.0.1", + "process": { + "terminal": false, + "user": { + "uid": 0, + "gid": 0 + }, + "args": [ + "/firecracker", + "--api-sock", + "api.socket" + ], + "env": [ + "PATH=/" + ], + "cwd": "/", + "capabilities": { + "effective": [ + ], + "bounding": [ + ], + "inheritable": [ + ], + "permitted": [ + ], + "ambient": [ + ] + }, + "rlimits": [ + { + "type": "RLIMIT_NOFILE", + "hard": 1024, + "soft": 1024 + } + ], + "noNewPrivileges": true + }, + "root": { + "path": "rootfs", + "readonly": false + }, + "hostname": "runc", + "mounts": [ + { + "destination": "/proc", + "type": "proc", + "source": "proc" + } + ], + "linux": { + "devices": [ + { + "path": "/dev/kvm", + "type": "c", + "major": 10, + "minor": 232, + "fileMode": 438, + "uid": 0, + "gid": 0 + }, + { + "path": "/dev/net/tun", + "type": "c", + "major": 10, + "minor": 200, + "fileMode": 438, + "uid": 0, + "gid": 0 + } + ], + "resources": { + "devices": [ + { + "allow": false, + "access": "rwm" + }, + { + "allow": true, + "major": 10, + "minor": 232, + "access": "rwm" + }, + { + "allow": true, + "major": 10, + "minor": 200, + "access": "rwm" + } + ] + }, + "namespaces": [ + { + "type": "cgroup" + }, + { + "type": "pid" + }, + { + "type": "network" + }, + { + "type": "ipc" + }, + { + "type": "uts" + }, + { + "type": "mount" + } + ], + "maskedPaths": [ + "/proc/asound", + "/proc/kcore", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + "/proc/scsi" + ], + "readonlyPaths": [ + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ] + } +} diff --git a/runtime/jailer.go b/runtime/jailer.go new file mode 100644 index 000000000..bfa093294 --- /dev/null +++ b/runtime/jailer.go @@ -0,0 +1,76 @@ +// Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package main + +import ( + "context" + + "github.com/firecracker-microvm/firecracker-go-sdk" + "github.com/sirupsen/logrus" + + "github.com/firecracker-microvm/firecracker-containerd/internal/vm" + "github.com/firecracker-microvm/firecracker-containerd/proto" +) + +const ( + kernelImageFileName = "kernel-image" + jailerHandlerName = "firecracker-containerd-jail-handler" + jailerFifoHandlerName = "firecracker-containerd-jail-fifo-handler" + rootfsFolder = "rootfs" + + // TODO evenetually we can get rid of this when we add usernamespaces to + // jailing. + jailerUID = 300000 + jailerGID = 300000 +) + +var ( + runcConfigPath = "/etc/containerd/firecracker-runc-config.json" +) + +// jailer will allow modification and provide options to the the Firecracker VM +// to allow for jailing. In addition, this will allow for given files to be exposed +// to the jailed filesystem. +type jailer interface { + // BuildJailedMachine will modify the firecracker.Config and provide + // firecracker.Opt to be passed into firecracker.NewMachine which will allow + // for the VM to be jailed. + BuildJailedMachine(cfg *Config, machineCfg *firecracker.Config, vmID string) ([]firecracker.Opt, error) + // ExposeDeviceToJail will expose the given device provided by the snapshotter + // to the jailed filesystem + ExposeDeviceToJail(path string) error + // JailPath is used to return the directory we are supposed to be working in. + JailPath() vm.Dir + // StubDrivesOptions will return a set of options used to create a new stub + // drive handler. + StubDrivesOptions() []stubDrivesOpt +} + +// newJailer is used to construct a jailer from the CreateVM request. If no +// request or jailer config was provided, then the noopJailer will be returned. +func newJailer( + ctx context.Context, + logger *logrus.Entry, + ociBundlePath string, + service *service, + request *proto.CreateVMRequest, +) (jailer, error) { + if request == nil || request.JailerConfig == nil { + l := logger.WithField("jailer", "noop") + return newNoopJailer(ctx, l, service.shimDir), nil + } + + l := logger.WithField("jailer", "runc") + return newRuncJailer(ctx, l, ociBundlePath, service.config.JailerConfig.RuncBinaryPath, jailerUID, jailerGID) +} diff --git a/runtime/jailer_test.go b/runtime/jailer_test.go new file mode 100644 index 000000000..c8683e238 --- /dev/null +++ b/runtime/jailer_test.go @@ -0,0 +1,43 @@ +// Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package main + +import ( + "os" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCopyFile_simple(t *testing.T) { + srcPath := "./firecracker-runc-config.json.example" + dstPath := "./test-copy-file" + + const expectedMode = 0600 + err := copyFile(srcPath, dstPath, expectedMode) + assert.NoError(t, err, "failed to copy file") + defer os.Remove(dstPath) + + info, err := os.Stat(dstPath) + assert.NoError(t, err, "failed to stat file") + assert.Equal(t, os.FileMode(expectedMode), info.Mode()) +} + +func TestCopyFile_invalidPaths(t *testing.T) { + srcPath := "./invalid.path" + dstPath := "./test-copy-file" + + err := copyFile(srcPath, dstPath, 0600) + assert.Error(t, err, "copyFile should have returned an error") +} diff --git a/runtime/noop_jailer.go b/runtime/noop_jailer.go new file mode 100644 index 000000000..420bd8957 --- /dev/null +++ b/runtime/noop_jailer.go @@ -0,0 +1,74 @@ +// Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package main + +import ( + "context" + + "github.com/firecracker-microvm/firecracker-containerd/internal/vm" + "github.com/firecracker-microvm/firecracker-go-sdk" + "github.com/sirupsen/logrus" +) + +// noopJailer is a jailer that returns only successful responses and performs +// no operations during calls +type noopJailer struct { + logger *logrus.Entry + shimDir vm.Dir + ctx context.Context +} + +func newNoopJailer(ctx context.Context, logger *logrus.Entry, shimDir vm.Dir) noopJailer { + return noopJailer{ + logger: logger, + shimDir: shimDir, + ctx: ctx, + } +} + +func (j noopJailer) BuildJailedMachine(cfg *Config, machineConfig *firecracker.Config, vmID string) ([]firecracker.Opt, error) { + if len(cfg.FirecrackerBinaryPath) == 0 { + return []firecracker.Opt{}, nil + } + + relSocketPath, err := j.shimDir.FirecrackerSockRelPath() + if err != nil { + return nil, err + } + + cmd := firecracker.VMCommandBuilder{}. + WithBin(cfg.FirecrackerBinaryPath). + WithSocketPath(relSocketPath). + Build(j.ctx) + + j.logger.Debug("noop operation for BuildJailedMachine") + return []firecracker.Opt{ + firecracker.WithProcessRunner(cmd), + }, nil +} + +func (j noopJailer) JailPath() vm.Dir { + j.logger.Debug("noop operation returning shim dir for JailPath") + return j.shimDir +} + +func (j noopJailer) ExposeDeviceToJail(path string) error { + j.logger.Debug("noop operation for ExposeDeviceToJail") + return nil +} + +func (j noopJailer) StubDrivesOptions() []stubDrivesOpt { + j.logger.Debug("noop operation for StubDrivesOptions") + return []stubDrivesOpt{} +} diff --git a/runtime/runc_jailer.go b/runtime/runc_jailer.go new file mode 100644 index 000000000..e22fe1ec7 --- /dev/null +++ b/runtime/runc_jailer.go @@ -0,0 +1,405 @@ +// Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package main + +import ( + "context" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "syscall" + + "github.com/firecracker-microvm/firecracker-go-sdk" + models "github.com/firecracker-microvm/firecracker-go-sdk/client/models" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" + + "github.com/firecracker-microvm/firecracker-containerd/internal" + "github.com/firecracker-microvm/firecracker-containerd/internal/vm" +) + +// runcJailer uses runc to set up a jailed environment for the Firecracker VM. +type runcJailer struct { + ctx context.Context + logger *logrus.Entry + // ociBundlePath is the path that will be used to create an OCI bundle, + // https://github.com/opencontainers/runtime-spec/blob/master/bundle.md + ociBundlePath string + // runcBinaryPath is the path used to execute the runc binary from. + runcBinaryPath string + uid uint32 + gid uint32 +} + +func newRuncJailer(ctx context.Context, logger *logrus.Entry, ociBundlePath, runcBinPath string, uid, gid uint32) (*runcJailer, error) { + l := logger.WithField("ociBundlePath", ociBundlePath). + WithField("runcBinaryPath", runcBinPath) + + j := &runcJailer{ + ctx: ctx, + logger: l, + ociBundlePath: ociBundlePath, + runcBinaryPath: runcBinPath, + uid: uid, + gid: gid, + } + + rootPath := j.RootPath() + + const mode = os.FileMode(0700) + // Create the proper paths needed for the runc jailer + j.logger.WithField("rootPath", rootPath).Debug("Creating root drive path") + if err := mkdirAndChown(rootPath, mode, j.uid, j.gid); err != nil { + return nil, errors.Wrapf(err, "%s failed to mkdirAndChown", rootPath) + } + + return j, nil +} + +// JailPath returns the base directory from where the jail binary will be ran +// from +func (j runcJailer) OCIBundlePath() string { + return j.ociBundlePath +} + +// RootPath returns the root fs of the jailed system. +func (j runcJailer) RootPath() string { + return filepath.Join(j.OCIBundlePath(), rootfsFolder) +} + +// JailPath will return the OCI bundle rootfs path +func (j runcJailer) JailPath() vm.Dir { + return vm.Dir(j.RootPath()) +} + +// BuildJailedMachine will return the needed options for a jailed Firecracker +// instance. In addition, some configuration values will be overwritten to the +// jailed values, like SocketPath in the machineConfig. +func (j *runcJailer) BuildJailedMachine(cfg *Config, machineConfig *firecracker.Config, vmID string) ([]firecracker.Opt, error) { + handler := j.BuildJailedRootHandler(cfg, &machineConfig.SocketPath, vmID) + fifoHandler := j.BuildLinkFifoHandler() + // Build a new client since BuildJailedRootHandler modifies the socket path value. + client := firecracker.NewClient(machineConfig.SocketPath, j.logger, machineConfig.Debug) + + opts := []firecracker.Opt{ + firecracker.WithProcessRunner(j.jailerCommand(vmID)), + firecracker.WithClient(client), + func(m *firecracker.Machine) { + m.Handlers.FcInit = m.Handlers.FcInit.Prepend(handler) + // The fifo handler should be appended after the creation of the fifos, + // ie CreateLogFilesHandlerName. The reason for this is the fifo handler + // that was created links the files to the jailed path, and if they do + // not exist an error will occur. The fifo handler should never do + // anything more than link the fifos and which will make it safe from the + // handler list changing order. + m.Handlers.FcInit = m.Handlers.FcInit.AppendAfter(firecracker.CreateLogFilesHandlerName, fifoHandler) + }, + } + + return opts, nil +} + +// BuildJailedRootHandler will populate the jail with the necessary files, which may be +// device nodes, hard links, and/or bind-mount targets +func (j *runcJailer) BuildJailedRootHandler(cfg *Config, socketPath *string, vmID string) firecracker.Handler { + ociBundlePath := j.OCIBundlePath() + rootPath := j.RootPath() + *socketPath = filepath.Join(rootPath, "api.socket") + + return firecracker.Handler{ + Name: jailerHandlerName, + Fn: func(ctx context.Context, m *firecracker.Machine) error { + + rootPathToConfig := filepath.Join(ociBundlePath, "config.json") + j.logger.WithField("rootPathToConfig", rootPathToConfig).Debug("Copying config") + if err := copyFile(runcConfigPath, rootPathToConfig, 0444); err != nil { + return errors.Wrapf(err, "failed to copy config from %v to %v", runcConfigPath, rootPathToConfig) + } + + j.logger.Debug("Overwritting process args of config") + if err := j.overwriteConfig(cfg, filepath.Base(m.Cfg.SocketPath), rootPathToConfig); err != nil { + return errors.Wrap(err, "failed to overwrite config.json") + } + + // copy the firecracker binary + j.logger.WithField("root path", rootPath).Debug("copying firecracker binary") + newFirecrackerBinPath := filepath.Join(rootPath, filepath.Base(cfg.FirecrackerBinaryPath)) + if err := copyFile( + cfg.FirecrackerBinaryPath, + newFirecrackerBinPath, + 0500, + ); err != nil { + return errors.Wrapf(err, "could not copy firecracker binary from path %v", cfg.FirecrackerBinaryPath) + } + if err := os.Chown(newFirecrackerBinPath, int(j.uid), int(j.gid)); err != nil { + return errors.Wrap(err, "failed to change ownership of binary") + } + + // copy the kernel image + newKernelImagePath := filepath.Join(rootPath, kernelImageFileName) + j.logger.WithField("newKernelImagePath", newKernelImagePath).Debug("copying kernel image") + + if err := copyFile(m.Cfg.KernelImagePath, newKernelImagePath, 0444); err != nil { + return errors.Wrap(err, "failed to mount kernel image") + } + + m.Cfg.KernelImagePath = kernelImageFileName + + // copy drives to new contents path + for i, d := range m.Cfg.Drives { + drivePath := firecracker.StringValue(d.PathOnHost) + fileName := filepath.Base(drivePath) + newDrivePath := filepath.Join(rootPath, fileName) + + f, err := os.Open(drivePath) + if err != nil { + return errors.Wrap(err, "failed to open drive file") + } + + // This closes the file in the event an error occurred, otherwise we + // call close down below. + defer f.Close() + + if !internal.IsStubDrive(f) { + info, err := os.Stat(drivePath) + if err != nil { + return errors.Wrapf(err, "failed to stat drive %q", drivePath) + } + + if err := copyFile(drivePath, newDrivePath, info.Mode()); err != nil { + return errors.Wrapf(err, "failed to copy drive %v", drivePath) + } + } + + if err := f.Close(); err != nil { + j.logger.WithError(err).Debug("failed to close drive file") + } + + j.logger.WithField("drive", newDrivePath).Debug("Adding drive") + m.Cfg.Drives[i].PathOnHost = firecracker.String(fileName) + } + + // Setting the proper path to where the vsock path should be + for i, v := range m.Cfg.VsockDevices { + j.logger.WithField("vsock path", v.Path).Debug("vsock device path being set relative to jailed directory") + + filename := filepath.Base(v.Path) + v.Path = filepath.Join("/", filename) + m.Cfg.VsockDevices[i] = v + } + + j.logger.Info("Successfully ran jailer handler") + return nil + }, + } +} + +// BuildLinkFifoHandler will return a new firecracker.Handler with the function +// that will allow linking of the fifos making them visible to Firecracker. +func (j runcJailer) BuildLinkFifoHandler() firecracker.Handler { + return firecracker.Handler{ + Name: jailerFifoHandlerName, + Fn: func(ctx context.Context, m *firecracker.Machine) error { + contentsPath := j.RootPath() + fifoFileName := filepath.Base(m.Cfg.LogFifo) + newFifoPath := filepath.Join(contentsPath, fifoFileName) + if err := os.Link(m.Cfg.LogFifo, newFifoPath); err != nil { + return err + } + m.Cfg.LogFifo = newFifoPath + + metricFifoFileName := filepath.Base(m.Cfg.MetricsFifo) + newMetricFifoPath := filepath.Join(contentsPath, metricFifoFileName) + if err := os.Link(m.Cfg.MetricsFifo, newMetricFifoPath); err != nil { + return err + } + m.Cfg.MetricsFifo = newMetricFifoPath + + return nil + }, + } +} + +// StubDrivesOptions will return a set of options used to create a new stub +// drive handler. +func (j runcJailer) StubDrivesOptions() []stubDrivesOpt { + return []stubDrivesOpt{ + func(drives []models.Drive) error { + for _, drive := range drives { + path := firecracker.StringValue(drive.PathOnHost) + if err := os.Chown(path, int(j.uid), int(j.gid)); err != nil { + return err + } + } + return nil + }, + } +} + +// ExposeDeviceToJail will inspect the given file, srcDevicePath, and based on the +// file type, proper handling will occur to ensure that the file is visible in +// the jail. For block devices we will use mknod to create the device and then +// set the correct permissions to ensure visibility in the jail. +func (j runcJailer) ExposeDeviceToJail(srcDevicePath string) error { + uid := j.uid + gid := j.gid + + stat := syscall.Stat_t{} + if err := syscall.Stat(srcDevicePath, &stat); err != nil { + return err + } + + // Checks file type using S_IFMT which is the bit mask for the file type. + // Here we only care about block devices, ie S_IFBLK. If it is a block type + // we will manually call mknod and create that device. + if (stat.Mode & syscall.S_IFMT) == syscall.S_IFBLK { + path := filepath.Join(j.RootPath(), filepath.Dir(srcDevicePath)) + if err := os.MkdirAll(path, 0700); err != nil { + return err + } + + dst := filepath.Join(path, filepath.Base(srcDevicePath)) + if err := exposeBlockDeviceToJail(dst, int(stat.Rdev), int(uid), int(gid)); err != nil { + return err + } + } else { + return fmt.Errorf("unsupported mode: %v", stat.Mode) + } + + return nil +} + +// exposeBlockDeviceToJail will call mknod on the block device to ensure +// visibility of the device +func exposeBlockDeviceToJail(dst string, rdev, uid, gid int) error { + if err := syscall.Mknod(dst, syscall.S_IFBLK, rdev); err != nil { + return err + } + + if err := os.Chmod(dst, 0600); err != nil { + return err + } + + if err := os.Chown(dst, uid, gid); err != nil { + return err + } + + return nil +} + +func copyFile(src, dst string, mode os.FileMode) error { + srcFile, err := os.Open(src) + if err != nil { + return errors.Wrapf(err, "failed to open %v", src) + } + defer srcFile.Close() + + dstFile, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_EXCL, mode) + if err != nil { + return errors.Wrapf(err, "failed to open %v", dstFile) + } + defer dstFile.Close() + + _, err = io.Copy(dstFile, srcFile) + if err != nil { + return errors.Wrap(err, "failed to copy to destination") + } + return nil +} + +func (j runcJailer) jailerCommand(containerName string) *exec.Cmd { + cmd := exec.CommandContext(j.ctx, j.runcBinaryPath, "run", containerName) + cmd.Dir = j.OCIBundlePath() + return cmd +} + +// overwriteConfig will set the proper default values if a field had not been set. +// +// TODO: Add netns +func (j runcJailer) overwriteConfig(cfg *Config, socketPath, configPath string) error { + spec := specs.Spec{} + configBytes, err := ioutil.ReadFile(configPath) + if err != nil { + return err + } + + if err := json.Unmarshal(configBytes, &spec); err != nil { + return err + } + + if spec.Process.User.UID != 0 || + spec.Process.User.GID != 0 { + return fmt.Errorf( + "using UID %d and GID %d, these values must not be set", + spec.Process.User.UID, + spec.Process.User.GID, + ) + } + + spec = j.setDefaultConfigValues(cfg, socketPath, spec) + + spec.Root.Path = rootfsFolder + spec.Root.Readonly = false + spec.Process.User.UID = j.uid + spec.Process.User.GID = j.gid + + configBytes, err = json.Marshal(&spec) + if err != nil { + return err + } + + if err := ioutil.WriteFile(configPath, configBytes, 0444); err != nil { + return err + } + + return nil +} + +// setDefaultConfigValues will process the spec file provided and allow any +// empty/zero values to be replaced with default values. +func (j runcJailer) setDefaultConfigValues(cfg *Config, socketPath string, spec specs.Spec) specs.Spec { + if spec.Process == nil { + spec.Process = &specs.Process{} + } + + if spec.Process.Args == nil { + cmd := firecracker.VMCommandBuilder{}. + WithBin("/firecracker"). + WithSocketPath(socketPath). + // Don't need to pass in an actual context here as we are only building + // the command arguments and not actually building a command + Build(context.Background()) + + spec.Process.Args = cmd.Args + } + + return spec +} + +func mkdirAndChown(path string, mode os.FileMode, uid, gid uint32) error { + if err := os.Mkdir(path, mode); err != nil { + return err + } + + if err := os.Chown(path, int(uid), int(gid)); err != nil { + return err + } + + return nil +} diff --git a/runtime/runc_jailer_test.go b/runtime/runc_jailer_test.go new file mode 100644 index 000000000..77604939a --- /dev/null +++ b/runtime/runc_jailer_test.go @@ -0,0 +1,93 @@ +// Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"). You may +// not use this file except in compliance with the License. A copy of the +// License is located at +// +// http://aws.amazon.com/apache2.0/ +// +// or in the "license" file accompanying this file. This file is distributed +// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language governing +// permissions and limitations under the License. + +package main + +import ( + "context" + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/firecracker-microvm/firecracker-go-sdk" + models "github.com/firecracker-microvm/firecracker-go-sdk/client/models" + "github.com/sirupsen/logrus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/firecracker-microvm/firecracker-containerd/internal" +) + +func TestBuildJailedRootHandler_Isolated(t *testing.T) { + internal.RequiresIsolation(t) + runcConfigPath = "./firecracker-runc-config.json.example" + dir, err := ioutil.TempDir("./", "TestBuildJailedRootHandler") + require.NoError(t, err, "failed to create temporary directory") + + defer os.RemoveAll(dir) + kernelImagePath := filepath.Join(dir, "kernel-image") + kernelImageFd, err := os.OpenFile(kernelImagePath, os.O_CREATE, 0600) + require.NoError(t, err, "failed to create kernel image") + defer kernelImageFd.Close() + + rootDrivePath := filepath.Join(dir, "root-drive") + rootDriveFd, err := os.OpenFile(rootDrivePath, os.O_CREATE, 0600) + require.NoError(t, err, "failed to create kernel image") + defer rootDriveFd.Close() + + firecrackerPath := filepath.Join(dir, "firecracker") + firecrackerFd, err := os.OpenFile(firecrackerPath, os.O_CREATE, 0600) + require.NoError(t, err, "failed to create firecracker") + defer firecrackerFd.Close() + + l := logrus.NewEntry(logrus.New()) + jailer, err := newRuncJailer(context.Background(), l, dir, "bin-path", 123, 456) + require.NoError(t, err, "failed to create runc jailer") + + cfg := Config{ + FirecrackerBinaryPath: firecrackerPath, + KernelImagePath: kernelImagePath, + RootDrive: rootDrivePath, + } + socketPath := "/path/to/api.socket" + vmID := "foo" + handler := jailer.BuildJailedRootHandler(&cfg, &socketPath, vmID) + + machine := firecracker.Machine{ + Cfg: firecracker.Config{ + SocketPath: socketPath, + KernelImagePath: kernelImagePath, + Drives: []models.Drive{ + { + PathOnHost: firecracker.String(rootDrivePath), + IsRootDevice: firecracker.Bool(true), + }, + }, + }, + } + err = handler.Fn(context.Background(), &machine) + assert.NoError(t, err, "jailed handler failed to run") + + _, err = os.Stat(filepath.Join(dir, "config.json")) + assert.NoError(t, err, "failed to copy runc config") + + _, err = os.Stat(filepath.Join(dir, "rootfs")) + assert.NoError(t, err, "failed to create rootfs") + + _, err = os.Stat(filepath.Join(dir, "rootfs", filepath.Base(kernelImagePath))) + assert.NoError(t, err, "failed to create kernel image") + + _, err = os.Stat(filepath.Join(dir, "rootfs", filepath.Base(rootDrivePath))) + assert.NoError(t, err, "failed to create root drive") +} diff --git a/runtime/service.go b/runtime/service.go index 618f08154..32ba1ed06 100644 --- a/runtime/service.go +++ b/runtime/service.go @@ -127,6 +127,8 @@ type service struct { machineConfig *firecracker.Config vsockIOPortCount uint32 vsockPortMu sync.Mutex + + jailer jailer } func shimOpts(shimCtx context.Context) (*shim.Opts, error) { @@ -191,6 +193,7 @@ func NewService(shimCtx context.Context, id string, remotePublisher shim.Publish config: config, vmReady: make(chan struct{}), + jailer: newNoopJailer(shimCtx, logger, shimDir), } s.startEventForwarders(remotePublisher) @@ -459,30 +462,36 @@ func (s *service) createVM(requestCtx context.Context, request *proto.CreateVMRe }() s.logger.Info("creating new VM") + s.jailer, err = newJailer(s.shimCtx, s.logger, string(s.shimDir), s, request) + if err != nil { + return errors.Wrap(err, "failed to create jailer") + } s.machineConfig, err = s.buildVMConfiguration(request) if err != nil { return errors.Wrapf(err, "failed to build VM configuration") } - relSockPath, err := s.shimDir.FirecrackerSockRelPath() - if err != nil { - return errors.Wrapf(err, "failed to get relative path to firecracker api socket") + opts := []firecracker.Opt{ + firecracker.WithLogger(s.logger), } - relVSockPath, err := s.shimDir.FirecrackerVSockRelPath() + relVSockPath, err := s.jailer.JailPath().FirecrackerVSockRelPath() if err != nil { return errors.Wrapf(err, "failed to get relative path to firecracker vsock") } - cmd := firecracker.VMCommandBuilder{}. - WithBin(s.config.FirecrackerBinaryPath). - WithSocketPath(relSockPath). - Build(s.shimCtx) // shimCtx so the VM process is only killed when the shim shuts down + jailedOpts, err := s.jailer.BuildJailedMachine(s.config, s.machineConfig, s.vmID) + if err != nil { + return errors.Wrap(err, "failed to build jailed machine options") + } + opts = append(opts, jailedOpts...) - // use shimCtx so the VM is killed when the shim shuts down - s.machine, err = firecracker.NewMachine(s.shimCtx, *s.machineConfig, - firecracker.WithLogger(s.logger), firecracker.WithProcessRunner(cmd)) + // In the event that a noop jailer is used, we will pass in the shim context + // and have the SDK construct a new machine using that context. Otherwise, a + // custom process runner will be provided via options which will stomp over + // the shim context that was provided here. + s.machine, err = firecracker.NewMachine(s.shimCtx, *s.machineConfig, opts...) if err != nil { return errors.Wrapf(err, "failed to create new machine instance") } @@ -595,7 +604,7 @@ func (s *service) buildVMConfiguration(req *proto.CreateVMRequest) (*firecracker return nil, errors.Wrapf(err, "failed to get relative path to firecracker api socket") } - relVSockPath, err := s.shimDir.FirecrackerVSockRelPath() + relVSockPath, err := s.jailer.JailPath().FirecrackerVSockRelPath() if err != nil { return nil, errors.Wrapf(err, "failed to get relative path to firecracker vsock") } @@ -639,7 +648,12 @@ func (s *service) buildVMConfiguration(req *proto.CreateVMRequest) (*firecracker containerCount = 1 } - stubDriveHandler, err := newStubDriveHandler(s.shimDir.RootPath(), s.logger, containerCount) + // Create stub drives first and let stub driver handler manage the drives + stubDriveHandler, err := newStubDriveHandler( + string(s.jailer.JailPath()), + s.logger, containerCount, + s.jailer.StubDrivesOptions()..., + ) if err != nil { return nil, errors.Wrap(err, "failed to create stub drives") } @@ -721,6 +735,12 @@ func (s *service) Create(requestCtx context.Context, request *taskAPI.CreateTask return nil, err } + for _, mnt := range request.Rootfs { + if err := s.jailer.ExposeDeviceToJail(mnt.Source); err != nil { + return nil, errors.Wrapf(err, "failed to expose mount to jail %v", mnt.Source) + } + } + var driveID *string for _, mnt := range request.Rootfs { driveID, err = s.stubDriveHandler.PatchStubDrive(requestCtx, s.machine, mnt.Source) @@ -751,7 +771,7 @@ func (s *service) Create(requestCtx context.Context, request *taskAPI.CreateTask return nil, err } - relVSockPath, err := s.shimDir.FirecrackerVSockRelPath() + relVSockPath, err := s.jailer.JailPath().FirecrackerVSockRelPath() if err != nil { return nil, errors.Wrapf(err, "failed to get relative path to firecracker vsock") } @@ -866,7 +886,7 @@ func (s *service) Exec(requestCtx context.Context, req *taskAPI.ExecProcessReque return nil, err } - relVSockPath, err := s.shimDir.FirecrackerVSockRelPath() + relVSockPath, err := s.jailer.JailPath().FirecrackerVSockRelPath() if err != nil { return nil, errors.Wrapf(err, "failed to get relative path to firecracker vsock") } diff --git a/runtime/service_integ_test.go b/runtime/service_integ_test.go index c98a8ae1c..ebf387801 100644 --- a/runtime/service_integ_test.go +++ b/runtime/service_integ_test.go @@ -221,12 +221,32 @@ func createTapDevice(ctx context.Context, tapName string) error { } func TestMultipleVMs_Isolated(t *testing.T) { - prepareIntegTest(t) + prepareIntegTest(t, func(cfg *Config) { + cfg.JailerConfig.RuncBinaryPath = "/usr/local/bin/runc" + }) - const ( - numVMs = 3 - containersPerVM = 5 - ) + cases := []struct { + MaxContainers int32 + JailerConfig *proto.JailerConfig + }{ + { + MaxContainers: 5, + }, + { + MaxContainers: 5, + }, + { + MaxContainers: 5, + }, + { + MaxContainers: 3, + JailerConfig: &proto.JailerConfig{}, + }, + { + MaxContainers: 3, + JailerConfig: &proto.JailerConfig{}, + }, + } testTimeout := 600 * time.Second ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), defaultNamespace), testTimeout) @@ -246,9 +266,9 @@ func TestMultipleVMs_Isolated(t *testing.T) { // container ends up in the right VM by assigning each VM a network device with a unique mac address and having each container // print the mac address it sees inside its VM. var vmWg sync.WaitGroup - for vmID := 0; vmID < numVMs; vmID++ { + for vmID, c := range cases { vmWg.Add(1) - go func(vmID int) { + go func(vmID int, containerCount int32, jailerConfig *proto.JailerConfig) { defer vmWg.Done() tapName := fmt.Sprintf("tap%d", vmID) @@ -258,7 +278,7 @@ func TestMultipleVMs_Isolated(t *testing.T) { rootfsPath := defaultVMRootfsPath fcClient := fccontrol.NewFirecrackerClient(pluginClient.Client()) - _, err = fcClient.CreateVM(ctx, &proto.CreateVMRequest{ + req := &proto.CreateVMRequest{ VMID: strconv.Itoa(vmID), MachineCfg: &proto.FirecrackerMachineConfiguration{ MemSizeMib: 512, @@ -276,17 +296,36 @@ func TestMultipleVMs_Isolated(t *testing.T) { }, }, }, - ContainerCount: containersPerVM, - }) + ContainerCount: containerCount, + JailerConfig: jailerConfig, + } + + if jailerConfig != nil { + req.NetworkInterfaces = nil + } + + _, err = fcClient.CreateVM(ctx, req) require.NoError(t, err, "failed to create vm") var containerWg sync.WaitGroup - for containerID := 0; containerID < containersPerVM; containerID++ { + for containerID := 0; containerID < int(containerCount); containerID++ { containerWg.Add(1) go func(containerID int) { defer containerWg.Done() containerName := fmt.Sprintf("container-%d-%d", vmID, containerID) snapshotName := fmt.Sprintf("snapshot-%d-%d", vmID, containerID) + processArgs := oci.WithProcessArgs("/bin/sh", "-c", strings.Join([]string{ + fmt.Sprintf("/bin/cat /sys/class/net/%s/address", defaultVMNetDevName), + "/usr/bin/readlink /proc/self/ns/mnt", + fmt.Sprintf("/bin/sleep %d", testTimeout/time.Second), + }, " && ")) + + if jailerConfig != nil { + // TODO: this if statement block can go away once we add netns + processArgs = oci.WithProcessArgs("/bin/sh", "-c", strings.Join([]string{ + fmt.Sprintf("/bin/sleep %d", testTimeout/time.Second), + }, " && ")) + } // spawn a container that just prints the VM's eth0 mac address (which we have set uniquely per VM) newContainer, err := client.NewContainer(ctx, @@ -294,11 +333,7 @@ func TestMultipleVMs_Isolated(t *testing.T) { containerd.WithSnapshotter(naiveSnapshotterName), containerd.WithNewSnapshot(snapshotName, image), containerd.WithNewSpec( - oci.WithProcessArgs("/bin/sh", "-c", strings.Join([]string{ - fmt.Sprintf("/bin/cat /sys/class/net/%s/address", defaultVMNetDevName), - "/usr/bin/readlink /proc/self/ns/mnt", - fmt.Sprintf("/bin/sleep %d", testTimeout/time.Second), - }, " && ")), + processArgs, oci.WithHostNamespace(specs.NetworkNamespace), firecrackeroci.WithVMID(strconv.Itoa(vmID)), ), @@ -377,6 +412,17 @@ func TestMultipleVMs_Isolated(t *testing.T) { execWg.Wait() close(execStdouts) + if jailerConfig != nil { + shimDir, err := vm.ShimDir("default", strconv.Itoa(vmID)) + require.NoError(t, err, "failed to get shim dir") + + jailer := &runcJailer{ + ociBundlePath: string(shimDir), + } + _, err = os.Stat(jailer.RootPath()) + require.NoError(t, err, "failed to stat root path of jailer") + } + // Verify each exec had the same stdout and use that value as the mount namespace that will be compared // against that of the task below. var execMntNS string @@ -408,13 +454,21 @@ func TestMultipleVMs_Isolated(t *testing.T) { } stdoutLines := strings.Split(strings.TrimSpace(taskStdout.String()), "\n") - require.Len(t, stdoutLines, 2) + lines := 2 + if jailerConfig != nil { + lines = 1 + } + require.Len(t, stdoutLines, lines) printedVMID := strings.TrimSpace(stdoutLines[0]) - require.Equal(t, vmIDtoMacAddr(uint(vmID)), printedVMID, "unexpected VMID output from container %q", containerName) + // TODO: Remove this if statement once we can add a netns which + // will allow firecracker to have visibility of the tap devices. + if jailerConfig == nil { + require.Equal(t, vmIDtoMacAddr(uint(vmID)), printedVMID, "unexpected VMID output from container %q", containerName) - taskMntNS := strings.TrimSpace(stdoutLines[1]) - require.Equal(t, execMntNS, taskMntNS, "unexpected mnt NS output from container %q", containerName) + taskMntNS := strings.TrimSpace(stdoutLines[1]) + require.Equal(t, execMntNS, taskMntNS, "unexpected mnt NS output from container %q", containerName) + } case <-ctx.Done(): require.Fail(t, "context cancelled", @@ -447,7 +501,7 @@ func TestMultipleVMs_Isolated(t *testing.T) { _, err = fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: strconv.Itoa(vmID), TimeoutSeconds: 5}) require.NoError(t, err, "failed to stop VM %d", vmID) - }(vmID) + }(vmID, c.MaxContainers, c.JailerConfig) } vmWg.Wait() diff --git a/runtime/service_test.go b/runtime/service_test.go index b3596b7dc..ebebbe332 100644 --- a/runtime/service_test.go +++ b/runtime/service_test.go @@ -14,6 +14,7 @@ package main import ( + "context" "fmt" "io/ioutil" "os" @@ -230,6 +231,7 @@ func TestBuildVMConfiguration(t *testing.T) { defer os.RemoveAll(tempDir) svc.shimDir = vm.Dir(tempDir) + svc.jailer = newNoopJailer(context.Background(), svc.logger, svc.shimDir) relSockPath, err := svc.shimDir.FirecrackerSockRelPath() require.NoError(t, err, "failed to get firecracker sock rel path") @@ -321,6 +323,7 @@ func TestDebugConfig(t *testing.T) { assert.NoError(t, err, "failed to create stub drive path") c.service.shimDir = vm.Dir(stubDrivePath) + c.service.jailer = newNoopJailer(context.Background(), c.service.logger, c.service.shimDir) req := proto.CreateVMRequest{} diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index d321ccaad..918678b05 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -108,6 +108,7 @@ RUN mkdir -p /var/lib/firecracker-containerd/runtime \ && curl --silent --show-error --retry 3 --max-time 30 --output default-vmlinux.bin \ "https://s3.amazonaws.com/spec.ccfc.min/img/hello/kernel/hello-vmlinux.bin" \ && echo "882fa465c43ab7d92e31bd4167da3ad6a82cb9230f9b0016176df597c6014cef default-vmlinux.bin" | sha256sum -c - \ + && chmod 0444 default-vmlinux.bin \ && mv default-vmlinux.bin /var/lib/firecracker-containerd/runtime/default-vmlinux.bin COPY --from=firecracker-containerd-build /home/builder/firecracker-containerd /firecracker-containerd @@ -149,6 +150,7 @@ COPY _submodules/firecracker/target/$FIRECRACKER_TARGET/release/firecracker /usr COPY _submodules/firecracker/target/$FIRECRACKER_TARGET/release/jailer /usr/local/bin/ COPY _submodules/runc/runc /usr/local/bin COPY tools/image-builder/rootfs.img /var/lib/firecracker-containerd/runtime/default-rootfs.img +COPY runtime/firecracker-runc-config.json.example /etc/containerd/firecracker-runc-config.json # pull the images the tests need into the content store so we don't need internet # access during the tests themselves @@ -157,7 +159,9 @@ RUN containerd 2>/dev/null & \ ctr content fetch docker.io/library/alpine:3.10.1 >/dev/null && \ ctr content fetch docker.io/mlabbe/iperf3:3.6-r0 >/dev/null -RUN mkdir -p /var/lib/firecracker-containerd/naive +COPY tools/docker/naive-snapshotter/entrypoint.sh /entrypoint +RUN chmod 0444 /var/lib/firecracker-containerd/runtime/default-rootfs.img \ + && mkdir -p /var/lib/firecracker-containerd/naive RUN make -C /firecracker-containerd demo-network RUN make -C /firecracker-containerd/internal test-bridged-tap && \ cp /firecracker-containerd/internal/test-bridged-tap /opt/cni/bin/ && \