Add haveged to image-builder's rootfs.

sipsma · sipsma · commit 121afa344490 · 2019-11-13T23:46:36.000Z
haveged ensures that there is sufficient entropy available to processes running
within the microvm. Without sufficient entropy, it is fairly easy for processes
to get blocked on reading /dev/random or making getrandom() syscalls, including
during boot (which can result in CreateVM calls to fail if the agent process
gets blocked).

haveged was chosen as it enforces no minimum kernel requirements, does not
add CPU requirements (i.e. existence of RDRAND or similar instructions) and is
currently used by Debian for related use cases such as seeding entropy in their
installer.

One other option was "rngd", which has versions that support use of RDRAND.
It was not chosen as RDRAND is not universally trusted or portable. Similarly,
use of the "random.trust_cpu=on" kernel boot parameter was ruled out for now
as it relies on RDRAND and additionally enforces a minimum kernel version of
4.19.

Signed-off-by: Erik Sipsma &lt;sipsma@amazon.com&gt;
diff --git a/runtime/service_integ_test.go b/runtime/service_integ_test.go
@@ -1104,3 +1104,116 @@ func TestUpdateVMMetadata_Isolated(t *testing.T) {
 	t.Logf("stdout output from task %q: %s", containerName, stdout)
 	assert.Equalf(t, "45", stdout, "container %q did not emit expected stdout", containerName)
 }
+
+// TestRandomness validates that there is a reasonable amount of entropy available to the VM and thus
+// randomness available to containers (test reads about 2.5MB from /dev/random w/ an overall test
+// timeout of 60 seconds). It also validates that the quality of the randomness passes the rngtest
+// utility's suite.
+func TestRandomness_Isolated(t *testing.T) {
+	prepareIntegTest(t)
+
+	ctx, cancel := context.WithTimeout(namespaces.WithNamespace(context.Background(), defaultNamespace), 60*time.Second)
+	defer cancel()
+
+	client, err := containerd.New(containerdSockPath, containerd.WithDefaultRuntime(firecrackerRuntime))
+	require.NoError(t, err, "unable to create client to containerd service at %s, is containerd running?", containerdSockPath)
+	defer client.Close()
+
+	image, err := alpineImage(ctx, client, defaultSnapshotterName())
+	require.NoError(t, err, "failed to get alpine image")
+	containerName := "test-entropy"
+
+	const blockCount = 1024
+	ddContainer, err := client.NewContainer(ctx,
+		containerName,
+		containerd.WithSnapshotter(defaultSnapshotterName()),
+		containerd.WithNewSnapshot("test-entropy-snapshot", image),
+		containerd.WithNewSpec(
+			oci.WithDefaultUnixDevices,
+			// Use blocksize of 2500 as rngtest consumes data in blocks of 2500 bytes.
+			oci.WithProcessArgs("/bin/dd", "iflag=fullblock", "if=/dev/random", "of=/dev/stdout", "bs=2500",
+				fmt.Sprintf("count=%d", blockCount)),
+		),
+	)
+	require.NoError(t, err, "failed to create container %s", containerName)
+
+	// rngtest is a utility to "check the randomness of data using FIPS 140-2 tests", installed as part of
+	// the container image this test is running in. We pipe the output from "dd if=/dev/random" to rngtest
+	// to validate the quality of the randomness inside the VM.
+	// TODO It would be conceptually simpler to just run rngtest inside the container in the VM, but
+	// doing so would require some updates to our test infrastructure to support custom-built container
+	// images running in VMs (right now it's only feasible to use publicly available container images).
+	// Right now, it's instead run as a subprocess of this test outside the VM.
+	var rngtestStdout bytes.Buffer
+	var rngtestStderr bytes.Buffer
+	rngtestCmd := exec.CommandContext(ctx, "rngtest",
+		// we set this to 1 less than the number of blocks read by dd above to account for the fact that
+		// the first 32 bits read by rngtest are not used for the tests themselves
+		fmt.Sprintf("--blockcount=%d", blockCount-1),
+	)
+	rngtestCmd.Stdout = &rngtestStdout
+	rngtestCmd.Stderr = &rngtestStderr
+	rngtestStdin, err := rngtestCmd.StdinPipe()
+	require.NoError(t, err, "failed to get pipe to rngtest command's stdin")
+
+	ddStdout := rngtestStdin
+	var ddStderr bytes.Buffer
+
+	task, err := ddContainer.NewTask(ctx, cio.NewCreator(cio.WithStreams(nil, ddStdout, &ddStderr)))
+	require.NoError(t, err, "failed to create task for dd container")
+
+	exitCh, err := task.Wait(ctx)
+	require.NoError(t, err, "failed to wait on task for dd container")
+
+	err = task.Start(ctx)
+	require.NoError(t, err, "failed to start task for dd container")
+
+	err = rngtestCmd.Start()
+	require.NoError(t, err, "failed to start rngtest")
+
+	select {
+	case exitStatus := <-exitCh:
+		assert.NoError(t, exitStatus.Error(), "failed to retrieve exitStatus")
+		assert.EqualValues(t, 0, exitStatus.ExitCode())
+
+		status, err := task.Delete(ctx)
+		assert.NoErrorf(t, err, "failed to delete dd task after exit")
+		if status != nil {
+			assert.NoError(t, status.Error())
+		}
+
+		t.Logf("stderr output from dd:\n %s", ddStderr.String())
+	case <-ctx.Done():
+		require.Fail(t, "context cancelled",
+			"context cancelled while waiting for dd container to exit (is it blocked on reading /dev/random?), err: %v", ctx.Err())
+	}
+
+	err = rngtestCmd.Wait()
+	t.Logf("stdout output from rngtest:\n %s", rngtestStdout.String())
+	t.Logf("stderr output from rngtest:\n %s", rngtestStderr.String())
+	if err != nil {
+		// rngtest will exit non-zero if any blocks fail its randomness tests.
+		// Trials showed an approximate false-negative rate of 27/32863 blocks,
+		// so testing on 1023 blocks gives a ~36% chance of there being a single
+		// false-negative. The chance of there being 5 or more drops down to
+		// about 0.1%, which is an acceptable flakiness rate, so we assert
+		// that there are no more than 4 failed blocks.
+		// Even though we have a failure tolerance, the test still provides some
+		// value in that we can be aware if a change to the rootfs results in a
+		// regression.
+		require.EqualValues(t, 1, rngtestCmd.ProcessState.ExitCode())
+		const failureTolerance = 4
+
+		for _, outputLine := range strings.Split(rngtestStderr.String(), "\n") {
+			var failureCount int
+			_, err := fmt.Sscanf(strings.TrimSpace(outputLine), "rngtest: FIPS 140-2 failures: %d", &failureCount)
+			if err == nil {
+				if failureCount > failureTolerance {
+					require.Failf(t, "too many d block test failures from rngtest",
+						"%d failures is greater than tolerance of up to %d failures", failureCount, failureTolerance)
+				}
+				break
+			}
+		}
+	}
+}
diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile
@@ -103,7 +103,8 @@ RUN apt-get update && apt-get install --yes --no-install-recommends \
 		iptables \
 		iperf3 \
 		libdevmapper-dev \
-		libseccomp-dev
+		libseccomp-dev \
+		rng-tools # used for rngtest
 
 RUN mkdir -p /var/lib/firecracker-containerd/runtime \
         && curl --silent --show-error --retry 3 --max-time 30 --output default-vmlinux.bin \
diff --git a/tools/image-builder/Makefile b/tools/image-builder/Makefile
@@ -55,7 +55,7 @@ ifneq ($(UID),0)
 endif
 	debootstrap \
 		--variant=minbase \
-		--include=udev,systemd,systemd-sysv,procps,libseccomp2 \
+		--include=udev,systemd,systemd-sysv,procps,libseccomp2,haveged \
 		stretch \
 		"$(WORKDIR)" $(DEBMIRROR)
 	rm -rf "$(WORKDIR)/var/cache/apt/archives" \
diff --git a/tools/image-builder/README.md b/tools/image-builder/README.md
@@ -74,3 +74,14 @@ the final parameter passed on the kernel command line.
 A complete command line, settable via the `kernel_args` setting in `/etc/containerd/firecracker-runtime.json`, is:
 
     ro console=ttyS0 noapic reboot=k panic=1 pci=off nomodules systemd.journald.forward_to_console systemd.unit=firecracker.target init=/sbin/overlay-init
+
+### Security ###
+
+In order to ensure sufficient entropy is consistently available within 
+the VM, the rootfs is configured to start the 
+[`haveged`](https://manpages.debian.org/buster/haveged/haveged.8.en.html)
+daemon during boot. [More information on its method of operation and other
+details can be found in its FAQ](https://issihosts.com/haveged/faq.html).
+Users of the image created by this utility are encouraged to evaluate 
+`haveged` against their security requirements before running any
+cryptographically-sensitive workloads inside their microVMs and containers.
diff --git a/tools/image-builder/files_debootstrap/etc/systemd/system/firecracker.target.wants/haveged.service b/tools/image-builder/files_debootstrap/etc/systemd/system/firecracker.target.wants/haveged.service
@@ -0,0 +1 @@
+/etc/systemd/system/haveged.service
diff --git a/tools/image-builder/files_debootstrap/etc/systemd/system/haveged.service b/tools/image-builder/files_debootstrap/etc/systemd/system/haveged.service
@@ -0,0 +1,22 @@
+[Unit]
+Description=Entropy Daemon based on the HAVEGE algorithm
+Documentation=man:haveged(8) http://www.issihosts.com/haveged/
+DefaultDependencies=no
+ConditionVirtualization=!container
+After=local-fs.target
+Before=firecracker.target sysinit.target shutdown.target
+
+[Service]
+ExecStart=/usr/sbin/haveged --Foreground --verbose=1 -w 1024
+SuccessExitStatus=143
+SecureBits=noroot-locked
+NoNewPrivileges=yes
+CapabilityBoundingSet=CAP_SYS_ADMIN
+PrivateTmp=yes
+PrivateDevices=yes
+PrivateNetwork=yes
+ProtectSystem=full
+ProtectHome=yes
+
+[Install]
+WantedBy=default.target