Skip to content

Commit ca3c896

Browse files
committed
tests: add cpu load test for high rx
This is a regression test for #1444 Signed-off-by: Ioana Chirca <[email protected]>
1 parent 9511525 commit ca3c896

File tree

3 files changed

+189
-0
lines changed

3 files changed

+189
-0
lines changed

tests/framework/microvm.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from retry.api import retry_call
2323

2424
import host_tools.logging as log_tools
25+
import host_tools.cpu_load as cpu_tools
2526
import host_tools.memory as mem_tools
2627
import host_tools.network as net_tools
2728

@@ -127,6 +128,10 @@ def __init__(
127128
else:
128129
self._memory_events_queue = None
129130

131+
# Cpu load monitoring has to be explicitly enabled using
132+
# the `enable_cpu_load_monitor` method.
133+
self._cpu_load_monitor = None
134+
130135
# External clone/exec tool, because Python can't into clone
131136
self.bin_cloner_path = bin_cloner_path
132137

@@ -149,6 +154,11 @@ def kill(self):
149154
raise mem_tools.MemoryUsageExceededException(
150155
self._memory_events_queue.get())
151156

157+
if self._cpu_load_monitor:
158+
self._cpu_load_monitor.signal_stop()
159+
self._cpu_load_monitor.join()
160+
self._cpu_load_monitor.check_samples()
161+
152162
@property
153163
def api_session(self):
154164
"""Return the api session associated with this microVM."""
@@ -272,6 +282,20 @@ def append_to_log_data(self, data):
272282
"""Append a message to the log data."""
273283
self._log_data += data
274284

285+
def enable_cpu_load_monitor(self, threshold):
286+
"""Enable the cpu load monitor."""
287+
process_pid = self.jailer_clone_pid
288+
# We want to monitor the emulation thread, which is currently
289+
# the first one created.
290+
# A possible improvement is to find it by name.
291+
thread_pid = self.jailer_clone_pid
292+
self._cpu_load_monitor = cpu_tools.CpuLoadMonitor(
293+
process_pid,
294+
thread_pid,
295+
threshold
296+
)
297+
self._cpu_load_monitor.start()
298+
275299
def create_jailed_resource(self, path, create_jail=False):
276300
"""Create a hard link to some resource inside this microvm."""
277301
return self.jailer.jailed_path(path, create=True,

tests/host_tools/cpu_load.py

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
"""Utilities for measuring cpu utilisation for a process."""
4+
import time
5+
from threading import Thread
6+
7+
import framework.utils as utils
8+
9+
# /proc/<pid>/stat output taken from
10+
# https://www.man7.org/linux/man-pages/man5/proc.5.html
11+
STAT_UTIME_IDX = 13
12+
STAT_STIME_IDX = 14
13+
STAT_STARTTIME_IDX = 21
14+
15+
16+
class CpuLoadExceededException(Exception):
17+
"""A custom exception containing details on excessive cpu load."""
18+
19+
def __init__(self, cpu_load_samples, threshold):
20+
"""Compose the error message containing the cpu load details."""
21+
super(CpuLoadExceededException, self).__init__(
22+
'Cpu load samples {} exceeded maximum threshold {}.\n'
23+
.format(cpu_load_samples, threshold)
24+
)
25+
26+
27+
class CpuLoadMonitor(Thread):
28+
"""Class to represent a cpu load monitor for a thread."""
29+
30+
CPU_LOAD_SAMPLES_TIMEOUT_S = 1
31+
32+
def __init__(
33+
self,
34+
process_pid,
35+
thread_pid,
36+
threshold
37+
):
38+
"""Set up monitor attributes."""
39+
Thread.__init__(self)
40+
self._process_pid = process_pid
41+
self._thread_pid = thread_pid
42+
self._cpu_load_samples = []
43+
self._threshold = threshold
44+
self._should_stop = False
45+
46+
@property
47+
def process_pid(self):
48+
"""Get the process pid."""
49+
return self._process_pid
50+
51+
@property
52+
def thread_pid(self):
53+
"""Get the thread pid."""
54+
return self._thread_pid
55+
56+
@property
57+
def threshold(self):
58+
"""Get the cpu load threshold."""
59+
return self._threshold
60+
61+
@property
62+
def cpu_load_samples(self):
63+
"""Get the cpu load samples."""
64+
return self._cpu_load_samples
65+
66+
def signal_stop(self):
67+
"""Signal that the thread should stop."""
68+
self._should_stop = True
69+
70+
def run(self):
71+
"""Thread for monitoring cpu load of some pid.
72+
73+
`/proc/<process pid>/task/<thread pid>/stat` is used to compute
74+
the cpu load, which is then added to the list.
75+
It is up to the caller to check the queue.
76+
"""
77+
clock_ticks_cmd = 'getconf CLK_TCK'
78+
try:
79+
stdout = utils.cmd_run(
80+
clock_ticks_cmd,
81+
).stdout.decode('utf-8')
82+
except ChildProcessError:
83+
return
84+
try:
85+
clock_ticks = int(stdout.strip("\n"))
86+
except ValueError:
87+
return
88+
89+
while not self._should_stop:
90+
try:
91+
with open('/proc/uptime') as uptime_file:
92+
uptime = uptime_file.readline().strip("\n").split()[0]
93+
94+
with open('/proc/{pid}/task/{tid}/stat'.format(
95+
pid=self.process_pid,
96+
tid=self.thread_pid)
97+
) as stat_file:
98+
stat = stat_file.readline().strip("\n").split()
99+
except IOError:
100+
break
101+
102+
try:
103+
uptime = float(uptime)
104+
utime = int(stat[STAT_UTIME_IDX])
105+
stime = int(stat[STAT_STIME_IDX])
106+
starttime = int(stat[STAT_STARTTIME_IDX])
107+
except ValueError:
108+
break
109+
110+
total_time = utime + stime
111+
seconds = uptime - starttime / clock_ticks
112+
cpu_load = (total_time * 100 / clock_ticks) / seconds
113+
114+
if cpu_load > self.threshold:
115+
self.cpu_load_samples.append(cpu_load)
116+
117+
time.sleep(self.CPU_LOAD_SAMPLES_TIMEOUT_S)
118+
119+
def check_samples(self):
120+
"""Check that there are no samples above the threshold."""
121+
if len(self.cpu_load_samples) > 0:
122+
raise CpuLoadExceededException(
123+
self._cpu_load_samples, self._threshold)

tests/integration_tests/functional/test_rate_limiter.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,48 @@ def test_rx_rate_limiting(test_microvm_with_ssh, network_config):
151151
_check_rx_rate_limit_patch(test_microvm, guest_ips)
152152

153153

154+
def test_rx_rate_limiting_cpu_load(test_microvm_with_ssh, network_config):
155+
"""Run iperf rx with rate limiting; verify cpu load is below threshold."""
156+
test_microvm = test_microvm_with_ssh
157+
test_microvm.spawn()
158+
159+
test_microvm.basic_config()
160+
161+
# Enable monitor that checks if the cpu load is over the threshold.
162+
# After multiple runs, the average value for the cpu load
163+
# seems to be around 10%. Setting the threshold a little
164+
# higher to skip false positives.
165+
threshold = 20
166+
test_microvm.enable_cpu_load_monitor(threshold)
167+
168+
# Create interface with aggressive rate limiting enabled.
169+
rx_rate_limiter_no_burst = {
170+
'bandwidth': {
171+
'size': 65536, # 64KBytes
172+
'refill_time': 1000 # 1s
173+
}
174+
}
175+
_tap, _host_ip, guest_ip = test_microvm.ssh_network_config(
176+
network_config,
177+
'1',
178+
rx_rate_limiter=rx_rate_limiter_no_burst
179+
)
180+
181+
test_microvm.start()
182+
183+
# Start iperf server on guest.
184+
_start_iperf_on_guest(test_microvm, guest_ip)
185+
186+
# Run iperf client sending UDP traffic.
187+
iperf_cmd = '{} {} -u -c {} -b 1000000000 -t{} -f KBytes'.format(
188+
test_microvm.jailer.netns_cmd_prefix(),
189+
IPERF_BINARY,
190+
guest_ip,
191+
IPERF_TRANSMIT_TIME * 5
192+
)
193+
_iperf_out = _run_local_iperf(iperf_cmd)
194+
195+
154196
def _check_tx_rate_limiting(test_microvm, guest_ips, host_ips):
155197
"""Check that the transmit rate is within expectations."""
156198
# Start iperf on the host as this is the tx rate limiting test.

0 commit comments

Comments
 (0)