Skip to content

Commit 13050dd

Browse files
authored
Merge pull request #348 from kzys/stopvm-timeout
Returns codes.DeadlineExceeded when we forcebily kill a VM
2 parents 831c73f + f2b0cce commit 13050dd

File tree

7 files changed

+84
-32
lines changed

7 files changed

+84
-32
lines changed

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,10 @@ DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH?=/etc/containerd/firecracker-runc-config.
183183
$(DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH): $(ETC_CONTAINERD) runtime/firecracker-runc-config.json.example
184184
install -D -o root -g root -m400 runtime/firecracker-runc-config.json.example $@
185185

186+
ROOTFS_SLOW_REBOOT_INSTALLPATH=$(FIRECRACKER_CONTAINERD_RUNTIME_DIR)/rootfs-slow-reboot.img
187+
$(ROOTFS_SLOW_REBOOT_INSTALLPATH): tools/image-builder/rootfs-slow-reboot.img $(FIRECRACKER_CONTAINERD_RUNTIME_DIR)
188+
install -D -o root -g root -m400 $< $@
189+
186190
.PHONY: default-vmlinux
187191
default-vmlinux: $(DEFAULT_VMLINUX_NAME)
188192

@@ -195,6 +199,8 @@ install-default-rootfs: $(DEFAULT_ROOTFS_INSTALLPATH)
195199
.PHONY: install-default-runc-jailer-config
196200
install-default-runc-jailer-config: $(DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH)
197201

202+
.PHONY: install-test-rootfs
203+
install-test-rootfs: $(ROOTFS_SLOW_REBOOT_INSTALLPATH)
198204

199205
##########################
200206
# CNI Network

agent/main.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,12 @@ func main() {
108108
}
109109

110110
group.Go(func() error {
111-
return server.Serve(shimCtx, listener)
111+
err := server.Serve(shimCtx, listener)
112+
if err == ttrpc.ErrServerClosed {
113+
// Calling server.Shutdown() from another goroutine will cause ErrServerClosed, which is fine.
114+
return nil
115+
}
116+
return err
112117
})
113118

114119
group.Go(func() error {

firecracker-control/local.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ func (s *local) StopVM(requestCtx context.Context, req *proto.StopVMRequest) (*e
218218

219219
resp, err := client.StopVM(requestCtx, req)
220220
if err != nil {
221-
err = errors.Wrap(err, "shim client failed to stop VM")
222221
s.logger.WithError(err).Error()
223222
return nil, err
224223
}

runtime/service.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,6 +1186,7 @@ func (s *service) shutdown(requestCtx, shutdownCtx context.Context, req *taskAPI
11861186
}
11871187
s.logger.WithError(shutdownErr).Error("the VM returns unknown error")
11881188
case <-shutdownCtx.Done():
1189+
shutdownErr = status.Errorf(codes.DeadlineExceeded, "the VM %q will be killed forcebily", req.ID)
11891190
s.logger.Error("the VM hasn't been stopped before the context's deadline")
11901191
}
11911192

runtime/service_integ_test.go

Lines changed: 54 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ import (
5050
"github.com/firecracker-microvm/firecracker-containerd/proto"
5151
fccontrol "github.com/firecracker-microvm/firecracker-containerd/proto/service/fccontrol/ttrpc"
5252
"github.com/firecracker-microvm/firecracker-containerd/runtime/firecrackeroci"
53+
54+
"google.golang.org/grpc/codes"
55+
"google.golang.org/grpc/status"
5356
)
5457

5558
const (
@@ -1303,32 +1306,63 @@ func TestStopVM_Isolated(t *testing.T) {
13031306
pluginClient, err := ttrpcutil.NewClient(containerdSockPath + ".ttrpc")
13041307
require.NoError(err, "failed to create ttrpc client")
13051308

1306-
vmID := testNameToVMID(t.Name())
1309+
tests := []struct {
1310+
name string
1311+
createVMRequest proto.CreateVMRequest
1312+
errorCode codes.Code
1313+
}{
1314+
{
1315+
name: "Successful",
1316+
createVMRequest: proto.CreateVMRequest{},
1317+
errorCode: codes.OK,
1318+
},
13071319

1308-
fcClient := fccontrol.NewFirecrackerClient(pluginClient.Client())
1309-
_, err = fcClient.CreateVM(ctx, &proto.CreateVMRequest{VMID: vmID})
1310-
require.NoError(err)
1320+
// Firecracker is too fast to test a case where we hit the timeout on a StopVMRequest.
1321+
// The rootfs below explicitly sleeps 60 seconds after shutting down the agent to simulate the case.
1322+
{
1323+
name: "Timeout",
1324+
createVMRequest: proto.CreateVMRequest{
1325+
RootDrive: &proto.FirecrackerRootDrive{
1326+
HostPath: "/var/lib/firecracker-containerd/runtime/rootfs-slow-reboot.img",
1327+
},
1328+
},
1329+
errorCode: codes.DeadlineExceeded,
1330+
},
1331+
}
13111332

1312-
c, err := client.NewContainer(ctx,
1313-
"container",
1314-
containerd.WithSnapshotter(defaultSnapshotterName()),
1315-
containerd.WithNewSnapshot("snapshot", image),
1316-
containerd.WithNewSpec(oci.WithProcessArgs("/bin/echo", "-n", "hello"), firecrackeroci.WithVMID(vmID)),
1317-
)
1318-
require.NoError(err)
1333+
for _, test := range tests {
1334+
test := test
1335+
t.Run(test.name, func(t *testing.T) {
1336+
vmID := testNameToVMID(t.Name())
1337+
createVMRequest := test.createVMRequest
1338+
createVMRequest.VMID = vmID
13191339

1320-
stdout := startAndWaitTask(ctx, t, c)
1321-
require.Equal("hello", stdout)
1340+
fcClient := fccontrol.NewFirecrackerClient(pluginClient.Client())
1341+
_, err = fcClient.CreateVM(ctx, &createVMRequest)
1342+
require.NoError(err)
13221343

1323-
shimProcesses, err := internal.WaitForProcessToExist(ctx, time.Second, findShim)
1324-
require.NoError(err, "failed waiting for expected shim process %q to come up", shimProcessName)
1325-
require.Len(shimProcesses, 1, "expected only one shim process to exist")
1344+
c, err := client.NewContainer(ctx,
1345+
"container-"+vmID,
1346+
containerd.WithSnapshotter(defaultSnapshotterName()),
1347+
containerd.WithNewSnapshot("snapshot-"+vmID, image),
1348+
containerd.WithNewSpec(oci.WithProcessArgs("/bin/echo", "-n", "hello"), firecrackeroci.WithVMID(vmID)),
1349+
)
1350+
require.NoError(err)
13261351

1327-
_, err = fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: vmID})
1328-
require.NoError(err)
1352+
stdout := startAndWaitTask(ctx, t, c)
1353+
require.Equal("hello", stdout)
13291354

1330-
err = internal.WaitForPidToExit(ctx, time.Second, shimProcesses[0].Pid)
1331-
require.NoError(err, "shim hasn't been terminated")
1355+
shimProcesses, err := internal.WaitForProcessToExist(ctx, time.Second, findShim)
1356+
require.NoError(err, "failed waiting for expected shim process %q to come up", shimProcessName)
1357+
require.Len(shimProcesses, 1, "expected only one shim process to exist")
1358+
1359+
_, err = fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: vmID})
1360+
require.Equal(status.Code(err), test.errorCode)
1361+
1362+
err = internal.WaitForPidToExit(ctx, time.Second, shimProcesses[0].Pid)
1363+
require.NoError(err, "shim hasn't been terminated")
1364+
})
1365+
}
13321366
}
13331367

13341368
func findShim(ctx context.Context, p *process.Process) (bool, error) {

tools/docker/Dockerfile.integ-test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ RUN --mount=type=bind,target=/src make -C /src \
5151
install-default-rootfs \
5252
install-default-runc-jailer-config \
5353
install-test-cni-bins \
54+
install-test-rootfs \
5455
demo-network
5556

5657
COPY tools/docker/entrypoint.sh /entrypoint

tools/image-builder/Makefile

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# permissions and limitations under the License.
1313

1414
UID := $(shell id -u)
15-
WORKDIR := rootfs
15+
WORKDIR := tmp/rootfs
1616
WORKDIRLOC := $(shell readlink -f $(WORKDIR))
1717
IMAGE_DIRS := /dev /bin /etc /etc/init.d /tmp /var /run /proc /sys /container/rootfs /agent /rom /overlay
1818
DIRS := $(foreach dir,$(IMAGE_DIRS),"$(WORKDIR)$(dir)")
@@ -37,7 +37,7 @@ fi
3737
touch --reference=debootstrap_stamp --no-create "$(WORKDIR)"
3838
endef
3939

40-
all: rootfs.img
40+
all: rootfs.img rootfs-slow-reboot.img
4141

4242
$(WORKDIR):
4343
mkdir $(WORKDIR)
@@ -67,6 +67,13 @@ endif
6767
rootfs.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
6868
mksquashfs "$(WORKDIR)" rootfs.img -noappend
6969

70+
# Intentionally break the rootfs to simulate the case where StopVM is taking longer than the minimal timeout
71+
rootfs-slow-reboot.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
72+
rm -fr tmp/$@
73+
cp -a "$(WORKDIR)" tmp/$@
74+
echo 'ExecStop=/bin/sleep 60' >> tmp/$@/etc/systemd/system/firecracker-agent.service
75+
mksquashfs tmp/$@ $@ -noappend
76+
7077
builder: builder_stamp
7178

7279
builder_stamp:
@@ -81,21 +88,20 @@ builder_stamp:
8188
%-in-docker: builder_stamp
8289
docker run --rm \
8390
--security-opt=apparmor=unconfined \
84-
-it --volume $(CURDIR):/src \
91+
-it \
92+
--volume $(CURDIR):/src \
93+
--volume /src/tmp \
8594
--cap-add=sys_admin \
8695
--cap-add=sys_chroot \
8796
--env=DEBMIRROR \
8897
fc-image-builder:$(DOCKER_IMAGE_TAG) $(subst -in-docker,,$@)
8998

9099
clean:
91100
-rm -f *stamp
92-
if [ -d $(WORKDIR) ] || [ -e rootfs.img ]; then \
93-
if [ $(UID) -eq 0 ]; then \
94-
rm -rf $(WORKDIR) \
95-
rm -f rootfs.img ;\
96-
else \
97-
$(MAKE) clean-in-docker ;\
98-
fi ; \
101+
if [ $(UID) -eq 0 ]; then \
102+
rm -f rootfs.img rootfs-slow-reboot.img ;\
103+
else \
104+
$(MAKE) clean-in-docker ;\
99105
fi
100106

101107
distclean: clean

0 commit comments

Comments
 (0)