Skip to content

Commit a062623

Browse files
kuba-mooNipaLocal
authored and
NipaLocal
committed
selftests: drv-net-hw: add test for memory allocation failures with page pool
Bugs in memory allocation failure paths are quite common. Add a test exercising those paths based on qstat and page pool failure hook. Running on bnxt: # ./drivers/net/hw/pp_alloc_fail.py KTAP version 1 1..1 # ethtool -G change retval: success ok 1 pp_alloc_fail.test_pp_alloc # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 I initially wrote this test to validate commit be43b74 ("net/mlx5e: RX, Fix page_pool allocation failure recovery for striding rq") but mlx5 still doesn't have qstat. So I run it on bnxt, and while bnxt survives I found the problem fixed in commit 7301177 ("eth: bnxt: fix counting packets discarded due to OOM and netpoll"). Reviewed-by: Willem de Bruijn <[email protected]> Signed-off-by: Jakub Kicinski <[email protected]> Signed-off-by: NipaLocal <nipa@local>
1 parent 1aedde3 commit a062623

File tree

3 files changed

+134
-0
lines changed

3 files changed

+134
-0
lines changed

tools/testing/selftests/drivers/net/hw/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ TEST_PROGS = \
99
hw_stats_l3.sh \
1010
hw_stats_l3_gre.sh \
1111
loopback.sh \
12+
pp_alloc_fail.py \
1213
#
1314

1415
TEST_FILES := \
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#!/usr/bin/env python3
2+
# SPDX-License-Identifier: GPL-2.0
3+
4+
import time
5+
import os
6+
from lib.py import ksft_run, ksft_exit, ksft_pr
7+
from lib.py import KsftSkipEx, KsftFailEx
8+
from lib.py import NetdevFamily, NlError
9+
from lib.py import NetDrvEpEnv
10+
from lib.py import cmd, tool, GenerateTraffic
11+
12+
13+
def _write_fail_config(config):
14+
for key, value in config.items():
15+
with open("/sys/kernel/debug/fail_function/" + key, "w") as fp:
16+
fp.write(str(value) + "\n")
17+
18+
19+
def _enable_pp_allocation_fail():
20+
if not os.path.exists("/sys/kernel/debug/fail_function"):
21+
raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
22+
23+
if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
24+
with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
25+
fp.write("page_pool_alloc_pages\n")
26+
27+
_write_fail_config({
28+
"verbose": 0,
29+
"interval": 511,
30+
"probability": 100,
31+
"times": -1,
32+
})
33+
34+
35+
def _disable_pp_allocation_fail():
36+
if not os.path.exists("/sys/kernel/debug/fail_function"):
37+
return
38+
39+
if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
40+
with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
41+
fp.write("\n")
42+
43+
_write_fail_config({
44+
"probability": 0,
45+
"times": 0,
46+
})
47+
48+
49+
def test_pp_alloc(cfg, netdevnl):
50+
def get_stats():
51+
return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
52+
53+
def check_traffic_flowing():
54+
stat1 = get_stats()
55+
time.sleep(1)
56+
stat2 = get_stats()
57+
if stat2['rx-packets'] - stat1['rx-packets'] < 15000:
58+
raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
59+
60+
61+
try:
62+
stats = get_stats()
63+
except NlError as e:
64+
if e.nl_msg.error == -95:
65+
stats = {}
66+
else:
67+
raise
68+
if 'rx-alloc-fail' not in stats:
69+
raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")
70+
71+
set_g = False
72+
traffic = None
73+
try:
74+
traffic = GenerateTraffic(cfg)
75+
76+
check_traffic_flowing()
77+
78+
_enable_pp_allocation_fail()
79+
80+
s1 = get_stats()
81+
time.sleep(3)
82+
s2 = get_stats()
83+
84+
if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1:
85+
raise KsftSkipEx("Allocation failures not increasing")
86+
if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100:
87+
raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'],
88+
"packets:", s2['rx-packets'] - s1['rx-packets'])
89+
90+
# Basic failures are fine, try to wobble some settings to catch extra failures
91+
check_traffic_flowing()
92+
g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
93+
if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
94+
new_g = g['rx'] * 2
95+
elif 'rx' in g:
96+
new_g = g['rx'] // 2
97+
else:
98+
new_g = None
99+
100+
if new_g:
101+
set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
102+
if set_g:
103+
ksft_pr("ethtool -G change retval: success")
104+
else:
105+
ksft_pr("ethtool -G change retval: did not succeed", new_g)
106+
else:
107+
ksft_pr("ethtool -G change retval: did not try")
108+
109+
time.sleep(0.1)
110+
check_traffic_flowing()
111+
finally:
112+
_disable_pp_allocation_fail()
113+
if traffic:
114+
traffic.stop()
115+
time.sleep(0.1)
116+
if set_g:
117+
cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")
118+
119+
120+
def main() -> None:
121+
netdevnl = NetdevFamily()
122+
with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
123+
124+
ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
125+
ksft_exit()
126+
127+
128+
if __name__ == "__main__":
129+
main()

tools/testing/selftests/net/lib/py/ksft.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
KSFT_RESULT_ALL = True
1212

1313

14+
class KsftFailEx(Exception):
15+
pass
16+
17+
1418
class KsftSkipEx(Exception):
1519
pass
1620

0 commit comments

Comments
 (0)