Skip to content

Commit a484de7

Browse files
ioanachircadianpopa
authored andcommitted
tests: add cpu load test for high rx
This is a regression test for #1444 Signed-off-by: Ioana Chirca <[email protected]>
1 parent 5135356 commit a484de7

File tree

3 files changed

+192
-0
lines changed

3 files changed

+192
-0
lines changed

tests/framework/microvm.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from retry.api import retry_call
2323

2424
import host_tools.logging as log_tools
25+
import host_tools.cpu_load as cpu_tools
2526
import host_tools.memory as mem_tools
2627
import host_tools.network as net_tools
2728

@@ -124,6 +125,10 @@ def __init__(
124125
else:
125126
self._memory_events_queue = None
126127

128+
# Cpu load monitoring has to be explicitly enabled using
129+
# the `enable_cpu_load_monitor` method.
130+
self._cpu_load_monitor = None
131+
127132
# External clone/exec tool, because Python can't into clone
128133
self.bin_cloner_path = bin_cloner_path
129134

@@ -146,6 +151,11 @@ def kill(self):
146151
raise mem_tools.MemoryUsageExceededException(
147152
self._memory_events_queue.get())
148153

154+
if self._cpu_load_monitor:
155+
self._cpu_load_monitor.signal_stop()
156+
self._cpu_load_monitor.join()
157+
self._cpu_load_monitor.check_samples()
158+
149159
@property
150160
def api_session(self):
151161
"""Return the api session associated with this microVM."""
@@ -258,6 +268,20 @@ def append_to_log_data(self, data):
258268
"""Append a message to the log data."""
259269
self._log_data += data
260270

271+
def enable_cpu_load_monitor(self, threshold):
272+
"""Enable the cpu load monitor."""
273+
process_pid = self.jailer_clone_pid
274+
# We want to monitor the emulation thread, which is currently
275+
# the first one created.
276+
# A possible improvement is to find it by name.
277+
thread_pid = self.jailer_clone_pid
278+
self._cpu_load_monitor = cpu_tools.CpuLoadMonitor(
279+
process_pid,
280+
thread_pid,
281+
threshold
282+
)
283+
self._cpu_load_monitor.start()
284+
261285
def create_jailed_resource(self, path, create_jail=False):
262286
"""Create a hard link to some resource inside this microvm."""
263287
return self.jailer.jailed_path(path, create=True,

tests/host_tools/cpu_load.py

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
"""Utilities for measuring cpu utilisation for a process."""
4+
import time
5+
6+
from subprocess import run, CalledProcessError, PIPE
7+
from threading import Thread
8+
9+
# /proc/<pid>/stat output taken from
10+
# https://www.man7.org/linux/man-pages/man5/proc.5.html
11+
STAT_UTIME_IDX = 13
12+
STAT_STIME_IDX = 14
13+
STAT_STARTTIME_IDX = 21
14+
15+
16+
class CpuLoadExceededException(Exception):
17+
"""A custom exception containing details on excessive cpu load."""
18+
19+
def __init__(self, cpu_load_samples, threshold):
20+
"""Compose the error message containing the cpu load details."""
21+
super(CpuLoadExceededException, self).__init__(
22+
'Cpu load samples {} exceeded maximum threshold {}.\n'
23+
.format(cpu_load_samples, threshold)
24+
)
25+
26+
27+
class CpuLoadMonitor(Thread):
28+
"""Class to represent a cpu load monitor for a thread."""
29+
30+
CPU_LOAD_SAMPLES_TIMEOUT_S = 1
31+
32+
def __init__(
33+
self,
34+
process_pid,
35+
thread_pid,
36+
threshold
37+
):
38+
"""Set up monitor attributes."""
39+
Thread.__init__(self)
40+
self._process_pid = process_pid
41+
self._thread_pid = thread_pid
42+
self._cpu_load_samples = []
43+
self._threshold = threshold
44+
self._should_stop = False
45+
46+
@property
47+
def process_pid(self):
48+
"""Get the process pid."""
49+
return self._process_pid
50+
51+
@property
52+
def thread_pid(self):
53+
"""Get the thread pid."""
54+
return self._thread_pid
55+
56+
@property
57+
def threshold(self):
58+
"""Get the cpu load threshold."""
59+
return self._threshold
60+
61+
@property
62+
def cpu_load_samples(self):
63+
"""Get the cpu load samples."""
64+
return self._cpu_load_samples
65+
66+
def signal_stop(self):
67+
"""Signal that the thread should stop."""
68+
self._should_stop = True
69+
70+
def run(self):
71+
"""Thread for monitoring cpu load of some pid.
72+
73+
`/proc/<process pid>/task/<thread pid>/stat` is used to compute
74+
the cpu load, which is then added to the list.
75+
It is up to the caller to check the queue.
76+
"""
77+
clock_ticks_cmd = 'getconf CLK_TCK'
78+
try:
79+
stdout = run(
80+
clock_ticks_cmd,
81+
shell=True,
82+
check=True,
83+
stdout=PIPE
84+
).stdout.decode('utf-8')
85+
except CalledProcessError:
86+
return
87+
try:
88+
clock_ticks = int(stdout.strip("\n"))
89+
except ValueError:
90+
return
91+
92+
while not self._should_stop:
93+
try:
94+
with open('/proc/uptime') as uptime_file:
95+
uptime = uptime_file.readline().strip("\n").split()[0]
96+
97+
with open('/proc/{pid}/task/{tid}/stat'.format(
98+
pid=self.process_pid,
99+
tid=self.thread_pid)
100+
) as stat_file:
101+
stat = stat_file.readline().strip("\n").split()
102+
except IOError:
103+
break
104+
105+
try:
106+
uptime = float(uptime)
107+
utime = int(stat[STAT_UTIME_IDX])
108+
stime = int(stat[STAT_STIME_IDX])
109+
starttime = int(stat[STAT_STARTTIME_IDX])
110+
except ValueError:
111+
break
112+
113+
total_time = utime + stime
114+
seconds = uptime - starttime / clock_ticks
115+
cpu_load = (total_time * 100 / clock_ticks) / seconds
116+
117+
if cpu_load > self.threshold:
118+
self.cpu_load_samples.append(cpu_load)
119+
120+
time.sleep(self.CPU_LOAD_SAMPLES_TIMEOUT_S)
121+
122+
def check_samples(self):
123+
"""Check that there are no samples above the threshold."""
124+
if len(self.cpu_load_samples) > 0:
125+
raise CpuLoadExceededException(
126+
self._cpu_load_samples, self._threshold)

tests/integration_tests/functional/test_rate_limiter.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,48 @@ def test_rx_rate_limiting(test_microvm_with_ssh, network_config):
152152
_check_rx_rate_limit_patch(test_microvm, guest_ips)
153153

154154

155+
def test_rx_rate_limiting_cpu_load(test_microvm_with_ssh, network_config):
156+
"""Run iperf rx with rate limiting; verify cpu load is below threshold."""
157+
test_microvm = test_microvm_with_ssh
158+
test_microvm.spawn()
159+
160+
test_microvm.basic_config()
161+
162+
# Enable monitor that checks if the cpu load is over the threshold.
163+
# After multiple runs, the average value for the cpu load
164+
# seems to be around 10%. Setting the threshold a little
165+
# higher to skip false positives.
166+
threshold = 20
167+
test_microvm.enable_cpu_load_monitor(threshold)
168+
169+
# Create interface with aggressive rate limiting enabled.
170+
rx_rate_limiter_no_burst = {
171+
'bandwidth': {
172+
'size': 65536, # 64KBytes
173+
'refill_time': 1000 # 1s
174+
}
175+
}
176+
_tap, _host_ip, guest_ip = test_microvm.ssh_network_config(
177+
network_config,
178+
'1',
179+
rx_rate_limiter=rx_rate_limiter_no_burst
180+
)
181+
182+
test_microvm.start()
183+
184+
# Start iperf server on guest.
185+
_start_iperf_on_guest(test_microvm, guest_ip)
186+
187+
# Run iperf client sending UDP traffic.
188+
iperf_cmd = '{} {} -u -c {} -b 1000000000 -t{} -f KBytes'.format(
189+
test_microvm.jailer.netns_cmd_prefix(),
190+
IPERF_BINARY,
191+
guest_ip,
192+
IPERF_TRANSMIT_TIME * 5
193+
)
194+
_iperf_out = _run_local_iperf(iperf_cmd)
195+
196+
155197
def _check_tx_rate_limiting(test_microvm, guest_ips, host_ips):
156198
"""Check that the transmit rate is within expectations."""
157199
# Start iperf on the host as this is the tx rate limiting test.

0 commit comments

Comments
 (0)