Skip to content

Commit f6e659b

Browse files
jkoong-fbAlexei Starovoitov
authored and
Alexei Starovoitov
committed
selftests/bpf: Measure bpf_loop verifier performance
This patch tests bpf_loop in pyperf and strobemeta, and measures the verifier performance of replacing the traditional for loop with bpf_loop. The results are as follows: ~strobemeta~ Baseline verification time 6808200 usec stack depth 496 processed 554252 insns (limit 1000000) max_states_per_insn 16 total_states 15878 peak_states 13489 mark_read 3110 #192 verif_scale_strobemeta:OK (unrolled loop) Using bpf_loop verification time 31589 usec stack depth 96+400 processed 1513 insns (limit 1000000) max_states_per_insn 2 total_states 106 peak_states 106 mark_read 60 #193 verif_scale_strobemeta_bpf_loop:OK ~pyperf600~ Baseline verification time 29702486 usec stack depth 368 processed 626838 insns (limit 1000000) max_states_per_insn 7 total_states 30368 peak_states 30279 mark_read 748 #182 verif_scale_pyperf600:OK (unrolled loop) Using bpf_loop verification time 148488 usec stack depth 320+40 processed 10518 insns (limit 1000000) max_states_per_insn 10 total_states 705 peak_states 517 mark_read 38 #183 verif_scale_pyperf600_bpf_loop:OK Using the bpf_loop helper led to approximately a 99% decrease in the verification time and in the number of instructions. Signed-off-by: Joanne Koong <[email protected]> Signed-off-by: Alexei Starovoitov <[email protected]> Acked-by: Andrii Nakryiko <[email protected]> Link: https://lore.kernel.org/bpf/[email protected]
1 parent 4e5070b commit f6e659b

File tree

5 files changed

+169
-4
lines changed

5 files changed

+169
-4
lines changed

tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,12 @@ void test_verif_scale_pyperf600()
115115
scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
116116
}
117117

118+
void test_verif_scale_pyperf600_bpf_loop(void)
119+
{
120+
/* use the bpf_loop helper*/
121+
scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
122+
}
123+
118124
void test_verif_scale_pyperf600_nounroll()
119125
{
120126
/* no unroll at all.
@@ -165,6 +171,12 @@ void test_verif_scale_strobemeta()
165171
scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
166172
}
167173

174+
void test_verif_scale_strobemeta_bpf_loop(void)
175+
{
176+
/* use the bpf_loop helper*/
177+
scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
178+
}
179+
168180
void test_verif_scale_strobemeta_nounroll1()
169181
{
170182
/* no unroll, tiny loops */

tools/testing/selftests/bpf/progs/pyperf.h

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,59 @@ struct {
159159
__uint(value_size, sizeof(long long) * 127);
160160
} stackmap SEC(".maps");
161161

162+
#ifdef USE_BPF_LOOP
163+
struct process_frame_ctx {
164+
int cur_cpu;
165+
int32_t *symbol_counter;
166+
void *frame_ptr;
167+
FrameData *frame;
168+
PidData *pidData;
169+
Symbol *sym;
170+
Event *event;
171+
bool done;
172+
};
173+
174+
#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
175+
176+
static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
177+
{
178+
int zero = 0;
179+
void *frame_ptr = ctx->frame_ptr;
180+
PidData *pidData = ctx->pidData;
181+
FrameData *frame = ctx->frame;
182+
int32_t *symbol_counter = ctx->symbol_counter;
183+
int cur_cpu = ctx->cur_cpu;
184+
Event *event = ctx->event;
185+
Symbol *sym = ctx->sym;
186+
187+
if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
188+
int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
189+
int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
190+
191+
if (!symbol_id) {
192+
bpf_map_update_elem(&symbolmap, sym, &zero, 0);
193+
symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
194+
if (!symbol_id) {
195+
ctx->done = true;
196+
return 1;
197+
}
198+
}
199+
if (*symbol_id == new_symbol_id)
200+
(*symbol_counter)++;
201+
202+
barrier_var(i);
203+
if (i >= STACK_MAX_LEN)
204+
return 1;
205+
206+
event->stack[i] = *symbol_id;
207+
208+
event->stack_len = i + 1;
209+
frame_ptr = frame->f_back;
210+
}
211+
return 0;
212+
}
213+
#endif /* USE_BPF_LOOP */
214+
162215
#ifdef GLOBAL_FUNC
163216
__noinline
164217
#elif defined(SUBPROGS)
@@ -228,11 +281,26 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
228281
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
229282
if (symbol_counter == NULL)
230283
return 0;
284+
#ifdef USE_BPF_LOOP
285+
struct process_frame_ctx ctx = {
286+
.cur_cpu = cur_cpu,
287+
.symbol_counter = symbol_counter,
288+
.frame_ptr = frame_ptr,
289+
.frame = &frame,
290+
.pidData = pidData,
291+
.sym = &sym,
292+
.event = event,
293+
};
294+
295+
bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
296+
if (ctx.done)
297+
return 0;
298+
#else
231299
#ifdef NO_UNROLL
232300
#pragma clang loop unroll(disable)
233301
#else
234302
#pragma clang loop unroll(full)
235-
#endif
303+
#endif /* NO_UNROLL */
236304
/* Unwind python stack */
237305
for (int i = 0; i < STACK_MAX_LEN; ++i) {
238306
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
@@ -251,6 +319,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
251319
frame_ptr = frame.f_back;
252320
}
253321
}
322+
#endif /* USE_BPF_LOOP */
254323
event->stack_complete = frame_ptr == NULL;
255324
} else {
256325
event->stack_complete = 1;
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2021 Facebook */
3+
4+
#define STACK_MAX_LEN 600
5+
#define USE_BPF_LOOP
6+
#include "pyperf.h"

tools/testing/selftests/bpf/progs/strobemeta.h

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,48 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
445445
return payload;
446446
}
447447

448+
#ifdef USE_BPF_LOOP
449+
enum read_type {
450+
READ_INT_VAR,
451+
READ_MAP_VAR,
452+
READ_STR_VAR,
453+
};
454+
455+
struct read_var_ctx {
456+
struct strobemeta_payload *data;
457+
void *tls_base;
458+
struct strobemeta_cfg *cfg;
459+
void *payload;
460+
/* value gets mutated */
461+
struct strobe_value_generic *value;
462+
enum read_type type;
463+
};
464+
465+
static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
466+
{
467+
switch (ctx->type) {
468+
case READ_INT_VAR:
469+
if (index >= STROBE_MAX_INTS)
470+
return 1;
471+
read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
472+
break;
473+
case READ_MAP_VAR:
474+
if (index >= STROBE_MAX_MAPS)
475+
return 1;
476+
ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
477+
ctx->value, ctx->data, ctx->payload);
478+
break;
479+
case READ_STR_VAR:
480+
if (index >= STROBE_MAX_STRS)
481+
return 1;
482+
ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
483+
ctx->value, ctx->data, ctx->payload);
484+
break;
485+
}
486+
return 0;
487+
}
488+
#endif /* USE_BPF_LOOP */
489+
448490
/*
449491
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
450492
* pointer to *right after* payload ends
@@ -475,30 +517,57 @@ static void *read_strobe_meta(struct task_struct *task,
475517
*/
476518
tls_base = (void *)task;
477519

520+
#ifdef USE_BPF_LOOP
521+
struct read_var_ctx ctx = {
522+
.cfg = cfg,
523+
.tls_base = tls_base,
524+
.value = &value,
525+
.data = data,
526+
.payload = payload,
527+
};
528+
int err;
529+
530+
ctx.type = READ_INT_VAR;
531+
err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
532+
if (err != STROBE_MAX_INTS)
533+
return NULL;
534+
535+
ctx.type = READ_STR_VAR;
536+
err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
537+
if (err != STROBE_MAX_STRS)
538+
return NULL;
539+
540+
ctx.type = READ_MAP_VAR;
541+
err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
542+
if (err != STROBE_MAX_MAPS)
543+
return NULL;
544+
#else
478545
#ifdef NO_UNROLL
479546
#pragma clang loop unroll(disable)
480547
#else
481548
#pragma unroll
482-
#endif
549+
#endif /* NO_UNROLL */
483550
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
484551
read_int_var(cfg, i, tls_base, &value, data);
485552
}
486553
#ifdef NO_UNROLL
487554
#pragma clang loop unroll(disable)
488555
#else
489556
#pragma unroll
490-
#endif
557+
#endif /* NO_UNROLL */
491558
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
492559
payload += read_str_var(cfg, i, tls_base, &value, data, payload);
493560
}
494561
#ifdef NO_UNROLL
495562
#pragma clang loop unroll(disable)
496563
#else
497564
#pragma unroll
498-
#endif
565+
#endif /* NO_UNROLL */
499566
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
500567
payload = read_map_var(cfg, i, tls_base, &value, data, payload);
501568
}
569+
#endif /* USE_BPF_LOOP */
570+
502571
/*
503572
* return pointer right after end of payload, so it's possible to
504573
* calculate exact amount of useful data that needs to be sent
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2+
/* Copyright (c) 2021 Facebook */
3+
4+
#define STROBE_MAX_INTS 2
5+
#define STROBE_MAX_STRS 25
6+
#define STROBE_MAX_MAPS 100
7+
#define STROBE_MAX_MAP_ENTRIES 20
8+
#define USE_BPF_LOOP
9+
#include "strobemeta.h"

0 commit comments

Comments
 (0)