diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c3dfab9b61..e321926488e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.21.2] + +### Fixed + +- Fixed #1754 - net: traffic blocks when running ingress UDP performance tests + with very large buffers. + ## [0.21.1] ### Fixed diff --git a/Cargo.lock b/Cargo.lock index b33a0f261b6..133a44cef5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -137,7 +137,7 @@ dependencies = [ [[package]] name = "firecracker" -version = "0.21.0" +version = "0.21.2" dependencies = [ "api_server", "backtrace", @@ -157,7 +157,7 @@ checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" [[package]] name = "jailer" -version = "0.21.0" +version = "0.21.2" dependencies = [ "libc", "regex", diff --git a/src/api_server/swagger/firecracker.yaml b/src/api_server/swagger/firecracker.yaml index d29ccc3eb2d..f5ef5b370c0 100644 --- a/src/api_server/swagger/firecracker.yaml +++ b/src/api_server/swagger/firecracker.yaml @@ -5,7 +5,7 @@ info: The API is accessible through HTTP calls on specific URLs carrying JSON modeled data. The transport medium is a Unix Domain Socket. - version: 0.21.1 + version: 0.21.2 termsOfService: "" contact: email: "compute-capsule@amazon.com" diff --git a/src/devices/src/virtio/net.rs b/src/devices/src/virtio/net.rs index 96332000ca2..6aeb8c377e3 100644 --- a/src/devices/src/virtio/net.rs +++ b/src/devices/src/virtio/net.rs @@ -552,7 +552,11 @@ impl EpollHandler for NetEpollHandler { RX_TAP_EVENT => { METRICS.net.rx_tap_event_count.inc(); - if self.rx.queue.is_empty(&self.mem) { + // While there are no available RX queue buffers and there's a deferred_frame + // don't process any more incoming. Otherwise start processing a frame. In the + // process the deferred_frame flag will be set in order to avoid freezing the + // RX queue. + if self.rx.queue.is_empty(&self.mem) && self.rx.deferred_frame { return Err(DeviceError::NoAvailBuffers); } @@ -1479,7 +1483,8 @@ mod tests { let mem = GuestMemoryMmap::from_ranges(&[(GuestAddress(0), 0x10000)]).unwrap(); let (mut h, _txq, rxq) = default_test_netepollhandler(&mem, test_mutators); - // The RX queue is empty. + // The RX queue is empty and rx.deferred_frame flag is set. + h.rx.deferred_frame = true; match h.handle_event(RX_TAP_EVENT, epoll::Events::EPOLLIN) { Err(DeviceError::NoAvailBuffers) => (), _ => panic!("invalid"), diff --git a/src/firecracker/Cargo.toml b/src/firecracker/Cargo.toml index b89ada67826..5a79ae64dfd 100644 --- a/src/firecracker/Cargo.toml +++ b/src/firecracker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "firecracker" -version = "0.21.1" +version = "0.21.2" authors = ["Amazon Firecracker team "] [dependencies] diff --git a/src/jailer/Cargo.toml b/src/jailer/Cargo.toml index 47d80cfe20f..7461edbdd5b 100644 --- a/src/jailer/Cargo.toml +++ b/src/jailer/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "jailer" -version = "0.21.1" +version = "0.21.2" authors = ["Amazon Firecracker team "] [dependencies] diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py index 47a618a25d1..01f8743b3f4 100644 --- a/tests/framework/microvm.py +++ b/tests/framework/microvm.py @@ -20,6 +20,7 @@ from retry import retry from retry.api import retry_call +import host_tools.cpu_load as cpu_tools import host_tools.memory as mem_tools import host_tools.network as net_tools @@ -31,6 +32,8 @@ MachineConfigure, Network, Vsock +# Too many public methods +# pylint: disable=R0904 class Microvm: """Class to represent a Firecracker microvm. @@ -114,6 +117,10 @@ def __init__( else: self._memory_events_queue = None + # Cpu load monitoring has to be explicitly enabled using + # the `enable_cpu_load_monitor` method. + self._cpu_load_monitor = None + # External clone/exec tool, because Python can't into clone self.bin_cloner_path = bin_cloner_path @@ -133,6 +140,11 @@ def kill(self): raise mem_tools.MemoryUsageExceededException( self._memory_events_queue.get()) + if self._cpu_load_monitor: + self._cpu_load_monitor.signal_stop() + self._cpu_load_monitor.join() + self._cpu_load_monitor.check_samples() + @property def api_session(self): """Return the api session associated with this microVM.""" @@ -220,6 +232,20 @@ def memory_events_queue(self, queue): """Set the memory usage events queue.""" self._memory_events_queue = queue + def enable_cpu_load_monitor(self, threshold): + """Enable the cpu load monitor.""" + process_pid = self.jailer_clone_pid + # We want to monitor the emulation thread, which is currently + # the first one created. + # A possible improvement is to find it by name. + thread_pid = self.jailer_clone_pid + self._cpu_load_monitor = cpu_tools.CpuLoadMonitor( + process_pid, + thread_pid, + threshold + ) + self._cpu_load_monitor.start() + def create_jailed_resource(self, path, create_jail=False): """Create a hard link to some resource inside this microvm.""" return self.jailer.jailed_path(path, create=True, diff --git a/tests/host_tools/cpu_load.py b/tests/host_tools/cpu_load.py new file mode 100644 index 00000000000..2b75e7b5fc1 --- /dev/null +++ b/tests/host_tools/cpu_load.py @@ -0,0 +1,126 @@ +# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Utilities for measuring cpu utilisation for a process.""" +import time + +from subprocess import run, CalledProcessError, PIPE +from threading import Thread + +# /proc//stat output taken from +# https://www.man7.org/linux/man-pages/man5/proc.5.html +STAT_UTIME_IDX = 13 +STAT_STIME_IDX = 14 +STAT_STARTTIME_IDX = 21 + + +class CpuLoadExceededException(Exception): + """A custom exception containing details on excessive cpu load.""" + + def __init__(self, cpu_load_samples, threshold): + """Compose the error message containing the cpu load details.""" + super(CpuLoadExceededException, self).__init__( + 'Cpu load samples {} exceeded maximum threshold {}.\n' + .format(cpu_load_samples, threshold) + ) + + +class CpuLoadMonitor(Thread): + """Class to represent a cpu load monitor for a thread.""" + + CPU_LOAD_SAMPLES_TIMEOUT_S = 1 + + def __init__( + self, + process_pid, + thread_pid, + threshold + ): + """Set up monitor attributes.""" + Thread.__init__(self) + self._process_pid = process_pid + self._thread_pid = thread_pid + self._cpu_load_samples = [] + self._threshold = threshold + self._should_stop = False + + @property + def process_pid(self): + """Get the process pid.""" + return self._process_pid + + @property + def thread_pid(self): + """Get the thread pid.""" + return self._thread_pid + + @property + def threshold(self): + """Get the cpu load threshold.""" + return self._threshold + + @property + def cpu_load_samples(self): + """Get the cpu load samples.""" + return self._cpu_load_samples + + def signal_stop(self): + """Signal that the thread should stop.""" + self._should_stop = True + + def run(self): + """Thread for monitoring cpu load of some pid. + + `/proc//task//stat` is used to compute + the cpu load, which is then added to the list. + It is up to the caller to check the queue. + """ + clock_ticks_cmd = 'getconf CLK_TCK' + try: + stdout = run( + clock_ticks_cmd, + shell=True, + check=True, + stdout=PIPE + ).stdout.decode('utf-8') + except CalledProcessError: + return + try: + clock_ticks = int(stdout.strip("\n")) + except ValueError: + return + + while not self._should_stop: + try: + with open('/proc/uptime') as uptime_file: + uptime = uptime_file.readline().strip("\n").split()[0] + + with open('/proc/{pid}/task/{tid}/stat'.format( + pid=self.process_pid, + tid=self.thread_pid) + ) as stat_file: + stat = stat_file.readline().strip("\n").split() + except IOError: + break + + try: + uptime = float(uptime) + utime = int(stat[STAT_UTIME_IDX]) + stime = int(stat[STAT_STIME_IDX]) + starttime = int(stat[STAT_STARTTIME_IDX]) + except ValueError: + break + + total_time = utime + stime + seconds = uptime - starttime / clock_ticks + cpu_load = (total_time * 100 / clock_ticks) / seconds + + if cpu_load > self.threshold: + self.cpu_load_samples.append(cpu_load) + + time.sleep(self.CPU_LOAD_SAMPLES_TIMEOUT_S) + + def check_samples(self): + """Check that there are no samples above the threshold.""" + if len(self.cpu_load_samples) > 0: + raise CpuLoadExceededException( + self._cpu_load_samples, self._threshold) diff --git a/tests/host_tools/network.py b/tests/host_tools/network.py index 2bc0cc9692f..6492608e97c 100644 --- a/tests/host_tools/network.py +++ b/tests/host_tools/network.py @@ -320,6 +320,19 @@ def netns(self): """Return the network namespace of this tap.""" return self._netns + def set_tx_queue_len(self, tx_queue_len): + """Set the length of the tap's TX queue.""" + run( + 'ip netns exec {} ip link set {} txqueuelen {}'.format( + self.netns, + self.name, + tx_queue_len + ), + shell=True, + stderr=PIPE, + check=True + ) + def __del__(self): """Destructor doing tap interface clean up.""" # pylint: disable=subprocess-run-check diff --git a/tests/integration_tests/functional/test_net.py b/tests/integration_tests/functional/test_net.py new file mode 100644 index 00000000000..2adb4fe337b --- /dev/null +++ b/tests/integration_tests/functional/test_net.py @@ -0,0 +1,51 @@ +# Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Tests for the net device.""" +from subprocess import run, PIPE +import time + +import host_tools.network as net_tools + +# The iperf version to run this tests with +IPERF_BINARY = 'iperf3' + + +def test_high_ingress_traffic(test_microvm_with_ssh, network_config): + """Run iperf rx with high UDP traffic.""" + test_microvm = test_microvm_with_ssh + test_microvm.spawn() + + test_microvm.basic_config() + + # Create tap before configuring interface. + tap, _host_ip, guest_ip = test_microvm.ssh_network_config( + network_config, + '1' + ) + # Set the tap's tx queue len to 5. This increases the probability + # of filling the tap under high ingress traffic. + tap.set_tx_queue_len(5) + + # Start the microvm. + test_microvm.start() + + # Start iperf3 server on the guest. + ssh_connection = net_tools.SSHConnection(test_microvm.ssh_config) + ssh_connection.execute_command('{} -sD\n'.format(IPERF_BINARY)) + time.sleep(1) + + # Start iperf3 client on the host. Send 1Gbps UDP traffic. + # If the net device breaks, iperf will freeze. We have to use a timeout. + run( + 'timeout 30 {} {} -c {} -u -V -b 1000000000 -t 30'.format( + test_microvm.jailer.netns_cmd_prefix(), + IPERF_BINARY, + guest_ip, + ), stdout=PIPE, shell=True, check=False + ) + + # Check if the high ingress traffic broke the net interface. + # If the net interface still works we should be able to execute + # ssh commands. + exit_code, _, _ = ssh_connection.execute_command('echo success\n') + assert exit_code == 0 diff --git a/tests/integration_tests/functional/test_rate_limiter.py b/tests/integration_tests/functional/test_rate_limiter.py index 122060b4101..46fd394e0a8 100644 --- a/tests/integration_tests/functional/test_rate_limiter.py +++ b/tests/integration_tests/functional/test_rate_limiter.py @@ -149,6 +149,48 @@ def test_rx_rate_limiting(test_microvm_with_ssh, network_config): _check_rx_rate_limit_patch(test_microvm, guest_ips) +def test_rx_rate_limiting_cpu_load(test_microvm_with_ssh, network_config): + """Run iperf rx with rate limiting; verify cpu load is below threshold.""" + test_microvm = test_microvm_with_ssh + test_microvm.spawn() + + test_microvm.basic_config() + + # Enable monitor that checks if the cpu load is over the threshold. + # After multiple runs, the average value for the cpu load + # seems to be around 10%. Setting the threshold a little + # higher to skip false positives. + threshold = 20 + test_microvm.enable_cpu_load_monitor(threshold) + + # Create interface with aggressive rate limiting enabled. + rx_rate_limiter_no_burst = { + 'bandwidth': { + 'size': 65536, # 64KBytes + 'refill_time': 1000 # 1s + } + } + _tap, _host_ip, guest_ip = test_microvm.ssh_network_config( + network_config, + '1', + rx_rate_limiter=rx_rate_limiter_no_burst + ) + + test_microvm.start() + + # Start iperf server on guest. + _start_iperf_on_guest(test_microvm, guest_ip) + + # Run iperf client sending UDP traffic. + iperf_cmd = '{} {} -u -c {} -b 1000000000 -t{} -f KBytes'.format( + test_microvm.jailer.netns_cmd_prefix(), + IPERF_BINARY, + guest_ip, + IPERF_TRANSMIT_TIME * 5 + ) + _iperf_out = _run_local_iperf(iperf_cmd) + + def _check_tx_rate_limiting(test_microvm, guest_ips, host_ips): """Check that the transmit rate is within expectations.""" # Start iperf on the host as this is the tx rate limiting test. diff --git a/tools/devtool b/tools/devtool index aba21f8c14e..445e816e32a 100755 --- a/tools/devtool +++ b/tools/devtool @@ -236,7 +236,7 @@ ensure_build_dir() { # owned by root. This fixes that by recursively changing the ownership of build/ # to the current user. # -fix_build_dir_perms() { +cmd_fix_perms() { # Yes, running Docker to get elevated privileges, just to chown some files # is a dirty hack. run_devctr \ @@ -412,6 +412,8 @@ cmd_help() { echo "" echo " checkenv" echo " Performs prerequisites checks needed to execute firecracker." + echo " fix_perms" + echo " Fixes permissions when devtool dies in the middle of a privileged session." echo "" } @@ -541,7 +543,7 @@ cmd_test() { # Running as root would have created some root-owned files under the build # dir. Let's fix that. - fix_build_dir_perms + cmd_fix_perms return $ret } @@ -588,7 +590,7 @@ cmd_shell() { # Running as root may have created some root-owned files under the build # dir. Let's fix that. # - fix_build_dir_perms + cmd_fix_perms else say "Dropping to shell prompt as user $(whoami) ..." say "Note: $FC_ROOT_DIR is bind-mounted under $CTR_FC_ROOT_DIR"