Skip to content

Handle non-empty EmptyDirs used by GCS Fuse CSI Driver. #11728

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/shim/v1/utils/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ go_library(
],
deps = [
"//runsc/specutils",
"@com_github_containerd_log//:go_default_library",
"@com_github_opencontainers_runtime_spec//specs-go:go_default_library",
],
)
Expand Down
69 changes: 52 additions & 17 deletions pkg/shim/v1/utils/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@ package utils

import (
"fmt"
"io"
"os"
"path/filepath"
"strings"

"github.com/containerd/log"
specs "github.com/opencontainers/runtime-spec/specs-go"
"gvisor.dev/gvisor/runsc/specutils"
)
Expand Down Expand Up @@ -81,6 +84,11 @@ func volumeSourceKey(volume string) string {
return volumeKeyPrefix + volume + ".source"
}

// volumeShareKey constructs the annotation key for volume share type.
func volumeShareKey(volume string) string {
return volumeKeyPrefix + volume + ".share"
}

// volumePath searches the volume path in the kubelet pod directory.
func volumePath(volume, uid string) (string, error) {
// TODO: Support subpath when gvisor supports pod volume bind mount.
Expand Down Expand Up @@ -121,17 +129,12 @@ func isVolumePath(volume, path string) (bool, error) {
// runsc should use these two setting to infer EmptyDir medium:
// - tmpfs annotation type + tmpfs mount type = memory-backed EmptyDir
// - tmpfs annotation type + bind mount type = disk-backed EmptyDir
//
// NOTE(b/416567832): Some CSI drivers (like GCS FUSE driver) use EmptyDirs to
// communicate with the Pod over a UDS. While not foolproof, we detect such
// EmptyDirs by checking if the host directory is not empty and turn off the
// EmptyDir optimization for them by configuring them as normal bind mounts.
func UpdateVolumeAnnotations(s *specs.Spec) (bool, error) {
var uid string
if IsSandbox(s) {
var err error
uid, err = podUID(s)
if err != nil {
// Skip if we can't get pod UID, because this doesn't work
// for containerd 1.1.
return false, nil
}
}
updated := false
for k, v := range s.Annotations {
if !isVolumeKey(k) {
Expand All @@ -141,34 +144,51 @@ func UpdateVolumeAnnotations(s *specs.Spec) (bool, error) {
continue
}
volume := volumeName(k)
if uid != "" {
if IsSandbox(s) {
// This is the root (first) container. Mount annotations are only
// consumed from this container's spec. So fix mount annotations by:
// 1. Adding source annotation.
// 2. Fixing type annotation.
uid, err := podUID(s)
if err != nil {
// Skip if we can't get pod UID, because this doesn't work
// for containerd 1.1.
return false, nil
}
path, err := volumePath(volume, uid)
if err != nil {
return false, fmt.Errorf("get volume path for %q: %w", volume, err)
}
s.Annotations[volumeSourceKey(volume)] = path
if strings.Contains(path, emptyDirVolumesDir) {
s.Annotations[k] = "tmpfs" // See note about EmptyDir.
if isDirEmpty(path) {
s.Annotations[k] = "tmpfs" // See note about EmptyDir.
} else {
// This is a non-empty EmptyDir volume. Configure it as a bind mount.
log.L.Infof("Non-empty EmptyDir volume %q, configuring bind mount annotations", volume)
s.Annotations[k] = "bind"
s.Annotations[volumeShareKey(volume)] = "shared"
}
}
updated = true
} else {
// This is a sub-container. Mount annotations are ignored. So no need to
// bother fixing those.
// bother fixing those. An error is returned for sandbox if source
// annotation is not successfully applied, so it is guaranteed that the
// source annotation for sandbox has already been successfully applied at
// this point. Update mount type in spec.Mounts if required.
for i := range s.Mounts {
// An error is returned for sandbox if source annotation is not
// successfully applied, so it is guaranteed that the source annotation
// for sandbox has already been successfully applied at this point.
//
// The volume name is unique inside a pod, so matching without podUID
// is fine here.
//
// TODO: Pass podUID down to shim for containers to do more accurate
// matching.
if yes, _ := isVolumePath(volume, s.Mounts[i].Source); yes {
if strings.Contains(s.Mounts[i].Source, emptyDirVolumesDir) && !isDirEmpty(s.Mounts[i].Source) {
// This is a non-empty EmptyDir volume. Don't change the mount type.
log.L.Infof("Non-empty EmptyDir volume %q, not changing its mount type", volume)
continue
}
// Container mount type must match the mount type specified by
// admission controller. See note about EmptyDir.
specutils.ChangeMountType(&s.Mounts[i], v)
Expand All @@ -187,6 +207,21 @@ func UpdateVolumeAnnotations(s *specs.Spec) (bool, error) {
return updated, nil
}

func isDirEmpty(path string) bool {
f, err := os.Open(path)
if err != nil {
log.L.Warningf("failed to open %q to check if it is empty: %v", path, err)
return true
}
defer f.Close()

_, err = f.Readdirnames(1)
if err == io.EOF {
return true
}
return false
}

// configureShm sets up annotations to mount /dev/shm as a pod shared tmpfs
// mount inside containers.
//
Expand Down
99 changes: 94 additions & 5 deletions pkg/shim/v1/utils/volumes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,25 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
kubeletPodsDir = dir

const (
testPodUID = "testuid"
testVolumeName = "testvolume"
testLogDirPath = "/var/log/pods/testns_testname_" + testPodUID
testLegacyLogDirPath = "/var/log/pods/" + testPodUID
testPodUID = "testuid"
testVolumeName = "testvolume"
testNonEmptyVolumeName = "nonemptyvolume"
testLogDirPath = "/var/log/pods/testns_testname_" + testPodUID
testLegacyLogDirPath = "/var/log/pods/" + testPodUID
)
testVolumePath := fmt.Sprintf("%s/%s/volumes/%s/%s", dir, testPodUID, emptyDirVolumesDir, testVolumeName)

if err := os.MkdirAll(testVolumePath, 0755); err != nil {
t.Fatalf("Create test volume: %v", err)
}

testNonEmptyVolumePath := fmt.Sprintf("%s/%s/volumes/%s/%s", dir, testPodUID, emptyDirVolumesDir, testNonEmptyVolumeName)
if err := os.MkdirAll(testNonEmptyVolumePath, 0755); err != nil {
t.Fatalf("Create test volume: %v", err)
}
if err := os.WriteFile(testNonEmptyVolumePath+"/file", []byte("hello"), 0644); err != nil {
t.Fatalf("Create test volume: %v", err)
}

for _, test := range []struct {
name string
spec *specs.Spec
Expand Down Expand Up @@ -144,6 +152,87 @@ func TestUpdateVolumeAnnotations(t *testing.T) {
},
expectUpdate: true,
},
{
name: "non-empty volume for sandbox",
spec: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLogDirPath,
ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testNonEmptyVolumeName + ".share": "pod",
volumeKeyPrefix + testNonEmptyVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testNonEmptyVolumeName + ".options": "ro",
},
},
expected: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLogDirPath,
ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testNonEmptyVolumeName + ".share": "shared",
volumeKeyPrefix + testNonEmptyVolumeName + ".type": "bind",
volumeKeyPrefix + testNonEmptyVolumeName + ".options": "ro",
volumeKeyPrefix + testNonEmptyVolumeName + ".source": testNonEmptyVolumePath,
},
},
expectUpdate: true,
},
{
name: "non-empty volume for container",
spec: &specs.Spec{
Mounts: []specs.Mount{
{
Destination: "/test",
Type: "bind",
Source: testNonEmptyVolumePath,
Options: []string{"ro"},
},
},
Annotations: map[string]string{
ContainerTypeAnnotation: ContainerTypeContainer,
volumeKeyPrefix + testNonEmptyVolumeName + ".share": "pod",
volumeKeyPrefix + testNonEmptyVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testNonEmptyVolumeName + ".options": "ro",
},
},
expected: &specs.Spec{
Mounts: []specs.Mount{
{
Destination: "/test",
Type: "bind",
Source: testNonEmptyVolumePath,
Options: []string{"ro"},
},
},
Annotations: map[string]string{
ContainerTypeAnnotation: ContainerTypeContainer,
volumeKeyPrefix + testNonEmptyVolumeName + ".share": "pod",
volumeKeyPrefix + testNonEmptyVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testNonEmptyVolumeName + ".options": "ro",
},
},
},
{
name: "bind: volume annotations for sandbox",
spec: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLogDirPath,
ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testVolumeName + ".share": "container",
volumeKeyPrefix + testVolumeName + ".type": "bind",
volumeKeyPrefix + testVolumeName + ".options": "ro",
},
},
expected: &specs.Spec{
Annotations: map[string]string{
sandboxLogDirAnnotation: testLogDirPath,
ContainerTypeAnnotation: containerTypeSandbox,
volumeKeyPrefix + testVolumeName + ".share": "container",
volumeKeyPrefix + testVolumeName + ".type": "tmpfs",
volumeKeyPrefix + testVolumeName + ".options": "ro",
volumeKeyPrefix + testVolumeName + ".source": testVolumePath,
},
},
expectUpdate: true,
},
{
name: "bind: volume annotations for container",
spec: &specs.Spec{
Expand Down
6 changes: 6 additions & 0 deletions runsc/boot/mount_hints.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ func (m *MountHint) ShouldShareMount() bool {
(m.Share == container || m.Share == pod)
}

// IsSandboxLocal returns true if this mount is only used by the sandbox and
// has no external observers.
func (m *MountHint) IsSandboxLocal() bool {
return m.Share == container || m.Share == pod
}

// checkCompatible verifies that shared mount is compatible with master.
// Master options must be the same or less restrictive than the container mount,
// e.g. master can be 'rw' while container mounts as 'ro'.
Expand Down
2 changes: 1 addition & 1 deletion runsc/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -965,7 +965,7 @@ func (c *Container) initGoferConfs(ovlConf config.Overlay2, mountHints *boot.Pod
if specutils.IsReadonlyMount(c.Spec.Mounts[i].Options) {
overlayMedium = config.NoOverlay
}
if hint := mountHints.FindMount(c.Spec.Mounts[i].Source); hint != nil {
if hint := mountHints.FindMount(c.Spec.Mounts[i].Source); hint != nil && hint.IsSandboxLocal() {
// Note that we want overlayMedium=self even if this is a read-only mount so that
// the shared mount is created correctly. Future containers may mount this writably.
overlayMedium = config.SelfOverlay
Expand Down