Skip to content

Make sure containers are deleted correctly #225

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 24, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions runtime/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ type service struct {
agentClient taskAPI.TaskService
eventBridgeClient eventbridge.Getter
stubDriveHandler stubDriveHandler
exitAfterAllTasksDeleted bool
exitAfterAllTasksDeleted bool // exit the VM and shim when all tasks are deleted

machine *firecracker.Machine
machineConfig *firecracker.Config
Expand Down Expand Up @@ -284,7 +284,8 @@ func (s *service) StartShim(shimCtx context.Context, containerID, containerdBina

logrus.SetOutput(logFifo)

log.G(shimCtx).WithField("id", containerID).Debug("StartShim")
log := log.G(shimCtx).WithField("id", containerID)
log.Debug("StartShim")

// If we are running a shim start routine, we can safely assume our current working
// directory is the bundle directory
Expand Down Expand Up @@ -319,6 +320,10 @@ func (s *service) StartShim(shimCtx context.Context, containerID, containerdBina
// task is deleted
containerCount = 1
exitAfterAllTasksDeleted = true

log.Infof("will start a single-task VM %s since no VMID has been provided", s.vmID)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea to add these!

} else {
log.Infof("will start a persistent VM %s", s.vmID)
}

client, err := ttrpcutil.NewClient(containerdAddress + ".ttrpc")
Expand Down Expand Up @@ -773,13 +778,36 @@ func (s *service) Start(requestCtx context.Context, req *taskAPI.StartRequest) (

func (s *service) Delete(requestCtx context.Context, req *taskAPI.DeleteRequest) (*taskAPI.DeleteResponse, error) {
defer logPanicAndDie(log.G(requestCtx))
log.G(requestCtx).WithFields(logrus.Fields{"id": req.ID, "exec_id": req.ExecID}).Debug("delete")
logger := log.G(requestCtx).WithFields(logrus.Fields{"id": req.ID, "exec_id": req.ExecID})

logger.Debug("delete")

resp, err := s.taskManager.DeleteProcess(requestCtx, req, s.agentClient)
if err != nil {
return nil, err
}

// Only delete a process as like runc when there is ExecID
// https://github.com/containerd/containerd/blob/f3e148b1ccf268450c87427b5dbb6187db3d22f1/runtime/v2/runc/container.go#L320
if req.ExecID != "" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I'd add a comment explaining why we skip deletion in this case

return resp, nil
}

// Otherwise, delete the container
dir, err := s.shimDir.BundleLink(req.ID)
if err != nil {
return nil, errors.Wrapf(err, "failed to find the bundle directory of the container: %s", req.ID)
}

_, err = os.Stat(dir.RootPath())
if os.IsNotExist(err) {
return nil, errors.Wrapf(err, "failed to find the bundle directory of the container: %s", dir.RootPath())
}

if err = os.Remove(dir.RootPath()); err != nil {
return nil, errors.Wrapf(err, "failed to remove the bundle directory of the container: %s", dir.RootPath())
}

return resp, nil
}

Expand Down
121 changes: 121 additions & 0 deletions runtime/service_integ_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"context"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"strconv"
Expand Down Expand Up @@ -533,3 +534,123 @@ vdf 254:80 0 512B 0 disk`
"context cancelled while waiting for container %s to exit, err: %v", containerName, ctx.Err())
}
}

func startAndWaitTask(ctx context.Context, t *testing.T, c containerd.Container) string {
var stdout bytes.Buffer
var stderr bytes.Buffer

task, err := c.NewTask(ctx, cio.NewCreator(cio.WithStreams(nil, &stdout, &stderr)))
require.NoError(t, err, "failed to create task for container %s", c.ID())

exitCh, err := task.Wait(ctx)
require.NoError(t, err, "failed to wait on task for container %s", c.ID())

err = task.Start(ctx)
require.NoError(t, err, "failed to start task for container %s", c.ID())
defer func() {
status, err := task.Delete(ctx)
require.NoError(t, status.Error())
require.NoError(t, err, "failed to delete task for container %s", c.ID())
}()

select {
case exitStatus := <-exitCh:
require.NoError(t, exitStatus.Error(), "failed to retrieve exitStatus")
require.Equal(t, uint32(0), exitStatus.ExitCode())
require.Equal(t, "", stderr.String())
case <-ctx.Done():
require.Fail(t, "context cancelled",
"context cancelled while waiting for container %s to exit, err: %v", c.ID(), ctx.Err())
}

return stdout.String()
}

func testCreateContainerWithSameName(t *testing.T, vmID string) {
ctx := namespaces.WithNamespace(context.Background(), "default")

pluginClient, err := ttrpcutil.NewClient(containerdSockPath + ".ttrpc")
require.NoError(t, err, "failed to create ttrpc client")

// Explicitly specify Container Count = 2 to workaround #230
if len(vmID) != 0 {
fcClient := fccontrol.NewFirecrackerClient(pluginClient.Client())
_, err = fcClient.CreateVM(ctx, &proto.CreateVMRequest{
VMID: vmID,
RootDrive: &proto.FirecrackerDrive{
PathOnHost: defaultRootfsPath,
IsReadOnly: true,
IsRootDevice: true,
},
ContainerCount: 2,
})
require.NoError(t, err)
}

withNewSpec := containerd.WithNewSpec(oci.WithProcessArgs("echo", "hello"), firecrackeroci.WithVMID(vmID))

client, err := containerd.New(containerdSockPath, containerd.WithDefaultRuntime(firecrackerRuntime))
require.NoError(t, err, "unable to create client to containerd service at %s, is containerd running?", containerdSockPath)
defer client.Close()

image, err := client.Pull(ctx, debianDockerImage, containerd.WithPullUnpack, containerd.WithPullSnapshotter(naiveSnapshotterName))
require.NoError(t, err, "failed to pull image %s, is the the %s snapshotter running?", debianDockerImage, naiveSnapshotterName)

containerName := fmt.Sprintf("%s-%d", t.Name(), time.Now().UnixNano())
snapshotName := fmt.Sprintf("%s-snapshot", containerName)

containerPath := fmt.Sprintf("/run/containerd/io.containerd.runtime.v2.task/default/%s", containerName)

c1, err := client.NewContainer(ctx,
containerName,
containerd.WithSnapshotter(naiveSnapshotterName),
containerd.WithNewSnapshot(snapshotName, image),
withNewSpec,
)
require.NoError(t, err, "failed to create container %s", containerName)
require.Equal(t, "hello\n", startAndWaitTask(ctx, t, c1))

// All resources regarding the container will be deleted
err = c1.Delete(ctx, containerd.WithSnapshotCleanup)
require.NoError(t, err, "failed to delete container %s", containerName)

_, err = os.Stat(containerPath)
require.True(t, os.IsNotExist(err))

if len(vmID) != 0 {
shimPath := fmt.Sprintf("%s/default/%s/%s", varRunDir, vmID, containerName)
_, err = os.Stat(shimPath)
require.True(t, os.IsNotExist(err))
}

// So, we can launch a new container with the same name
c2, err := client.NewContainer(ctx,
containerName,
containerd.WithSnapshotter(naiveSnapshotterName),
containerd.WithNewSnapshot(snapshotName, image),
withNewSpec,
)
require.NoError(t, err, "failed to create container %s", containerName)
require.Equal(t, "hello\n", startAndWaitTask(ctx, t, c2))

err = c2.Delete(ctx, containerd.WithSnapshotCleanup)
require.NoError(t, err, "failed to delete container %s", containerName)

_, err = os.Stat(containerPath)
require.True(t, os.IsNotExist(err))

if len(vmID) != 0 {
shimPath := fmt.Sprintf("%s/default/%s/%s", varRunDir, vmID, containerName)
_, err = os.Stat(shimPath)
require.True(t, os.IsNotExist(err))
}
}

func TestCreateContainerWithSameName_Isolated(t *testing.T) {
internal.RequiresIsolation(t)

testCreateContainerWithSameName(t, "")

vmID := fmt.Sprintf("same-vm-%d", time.Now().UnixNano())
testCreateContainerWithSameName(t, vmID)
}