Skip to content

Commit e050e3f

Browse files
Stephane EranianIngo Molnar
authored andcommitted
perf: Fix broken interrupt rate throttling
This patch fixes the sampling interrupt throttling mechanism. It was broken in v3.2. Events were not being unthrottled. The unthrottling mechanism required that events be checked at each timer tick. This patch solves this problem and also separates: - unthrottling - multiplexing - frequency-mode period adjustments Not all of them need to be executed at each timer tick. This third version of the patch is based on my original patch + PeterZ proposal (https://lkml.org/lkml/2012/1/7/87). At each timer tick, for each context: - if the current CPU has throttled events, we unthrottle events - if context has frequency-based events, we adjust sampling periods - if we have reached the jiffies interval, we multiplex (rotate) We decoupled rotation (multiplexing) from frequency-mode sampling period adjustments. They should not necessarily happen at the same rate. Multiplexing is subject to jiffies_interval (currently at 1 but could be higher once the tunable is exposed via sysfs). We have grouped frequency-mode adjustment and unthrottling into the same routine to minimize code duplication. When throttled while in frequency mode, we scan the events only once. We have fixed the threshold enforcement code in __perf_event_overflow(). There was a bug whereby it would allow more than the authorized rate because an increment of hwc->interrupts was not executed at the right place. The patch was tested with low sampling limit (2000) and fixed periods, frequency mode, overcommitted PMU. On a 2.1GHz AMD CPU: $ cat /proc/sys/kernel/perf_event_max_sample_rate 2000 We set a rate of 3000 samples/sec (2.1GHz/3000 = 700000): $ perf record -e cycles,cycles -c 700000 noploop 10 $ perf report -D | tail -21 Aggregated stats: TOTAL events: 80086 MMAP events: 88 COMM events: 2 EXIT events: 4 THROTTLE events: 19996 UNTHROTTLE events: 19996 SAMPLE events: 40000 cycles stats: TOTAL events: 40006 MMAP events: 5 COMM events: 1 EXIT events: 4 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 cycles stats: TOTAL events: 39996 THROTTLE events: 9998 UNTHROTTLE events: 9998 SAMPLE events: 20000 For 10s, the cap is 2x2000x10 = 40000 samples. We get exactly that: 20000 samples/event. Signed-off-by: Stephane Eranian <[email protected]> Cc: <[email protected]> # v3.2+ Signed-off-by: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/r/20120126160319.GA5655@quad Signed-off-by: Ingo Molnar <[email protected]>
1 parent 74ea15d commit e050e3f

File tree

2 files changed

+67
-38
lines changed

2 files changed

+67
-38
lines changed

include/linux/perf_event.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,7 @@ struct hw_perf_event {
587587
u64 sample_period;
588588
u64 last_period;
589589
local64_t period_left;
590+
u64 interrupts_seq;
590591
u64 interrupts;
591592

592593
u64 freq_time_stamp;

kernel/events/core.c

Lines changed: 66 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2300,6 +2300,9 @@ do { \
23002300
return div64_u64(dividend, divisor);
23012301
}
23022302

2303+
static DEFINE_PER_CPU(int, perf_throttled_count);
2304+
static DEFINE_PER_CPU(u64, perf_throttled_seq);
2305+
23032306
static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
23042307
{
23052308
struct hw_perf_event *hwc = &event->hw;
@@ -2325,16 +2328,29 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
23252328
}
23262329
}
23272330

2328-
static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
2331+
/*
2332+
* combine freq adjustment with unthrottling to avoid two passes over the
2333+
* events. At the same time, make sure, having freq events does not change
2334+
* the rate of unthrottling as that would introduce bias.
2335+
*/
2336+
static void perf_adjust_freq_unthr_context(struct perf_event_context *ctx,
2337+
int needs_unthr)
23292338
{
23302339
struct perf_event *event;
23312340
struct hw_perf_event *hwc;
2332-
u64 interrupts, now;
2341+
u64 now, period = TICK_NSEC;
23332342
s64 delta;
23342343

2335-
if (!ctx->nr_freq)
2344+
/*
2345+
* only need to iterate over all events iff:
2346+
* - context have events in frequency mode (needs freq adjust)
2347+
* - there are events to unthrottle on this cpu
2348+
*/
2349+
if (!(ctx->nr_freq || needs_unthr))
23362350
return;
23372351

2352+
raw_spin_lock(&ctx->lock);
2353+
23382354
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
23392355
if (event->state != PERF_EVENT_STATE_ACTIVE)
23402356
continue;
@@ -2344,28 +2360,35 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
23442360

23452361
hwc = &event->hw;
23462362

2347-
interrupts = hwc->interrupts;
2348-
hwc->interrupts = 0;
2349-
2350-
/*
2351-
* unthrottle events on the tick
2352-
*/
2353-
if (interrupts == MAX_INTERRUPTS) {
2363+
if (needs_unthr && hwc->interrupts == MAX_INTERRUPTS) {
2364+
hwc->interrupts = 0;
23542365
perf_log_throttle(event, 1);
23552366
event->pmu->start(event, 0);
23562367
}
23572368

23582369
if (!event->attr.freq || !event->attr.sample_freq)
23592370
continue;
23602371

2361-
event->pmu->read(event);
2372+
/*
2373+
* stop the event and update event->count
2374+
*/
2375+
event->pmu->stop(event, PERF_EF_UPDATE);
2376+
23622377
now = local64_read(&event->count);
23632378
delta = now - hwc->freq_count_stamp;
23642379
hwc->freq_count_stamp = now;
23652380

2381+
/*
2382+
* restart the event
2383+
* reload only if value has changed
2384+
*/
23662385
if (delta > 0)
23672386
perf_adjust_period(event, period, delta);
2387+
2388+
event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
23682389
}
2390+
2391+
raw_spin_unlock(&ctx->lock);
23692392
}
23702393

23712394
/*
@@ -2388,54 +2411,40 @@ static void rotate_ctx(struct perf_event_context *ctx)
23882411
*/
23892412
static void perf_rotate_context(struct perf_cpu_context *cpuctx)
23902413
{
2391-
u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
23922414
struct perf_event_context *ctx = NULL;
2393-
int rotate = 0, remove = 1, freq = 0;
2415+
int rotate = 0, remove = 1;
23942416

23952417
if (cpuctx->ctx.nr_events) {
23962418
remove = 0;
23972419
if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
23982420
rotate = 1;
2399-
if (cpuctx->ctx.nr_freq)
2400-
freq = 1;
24012421
}
24022422

24032423
ctx = cpuctx->task_ctx;
24042424
if (ctx && ctx->nr_events) {
24052425
remove = 0;
24062426
if (ctx->nr_events != ctx->nr_active)
24072427
rotate = 1;
2408-
if (ctx->nr_freq)
2409-
freq = 1;
24102428
}
24112429

2412-
if (!rotate && !freq)
2430+
if (!rotate)
24132431
goto done;
24142432

24152433
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
24162434
perf_pmu_disable(cpuctx->ctx.pmu);
24172435

2418-
if (freq) {
2419-
perf_ctx_adjust_freq(&cpuctx->ctx, interval);
2420-
if (ctx)
2421-
perf_ctx_adjust_freq(ctx, interval);
2422-
}
2423-
2424-
if (rotate) {
2425-
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2426-
if (ctx)
2427-
ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2436+
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2437+
if (ctx)
2438+
ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
24282439

2429-
rotate_ctx(&cpuctx->ctx);
2430-
if (ctx)
2431-
rotate_ctx(ctx);
2440+
rotate_ctx(&cpuctx->ctx);
2441+
if (ctx)
2442+
rotate_ctx(ctx);
24322443

2433-
perf_event_sched_in(cpuctx, ctx, current);
2434-
}
2444+
perf_event_sched_in(cpuctx, ctx, current);
24352445

24362446
perf_pmu_enable(cpuctx->ctx.pmu);
24372447
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2438-
24392448
done:
24402449
if (remove)
24412450
list_del_init(&cpuctx->rotation_list);
@@ -2445,10 +2454,22 @@ void perf_event_task_tick(void)
24452454
{
24462455
struct list_head *head = &__get_cpu_var(rotation_list);
24472456
struct perf_cpu_context *cpuctx, *tmp;
2457+
struct perf_event_context *ctx;
2458+
int throttled;
24482459

24492460
WARN_ON(!irqs_disabled());
24502461

2462+
__this_cpu_inc(perf_throttled_seq);
2463+
throttled = __this_cpu_xchg(perf_throttled_count, 0);
2464+
24512465
list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
2466+
ctx = &cpuctx->ctx;
2467+
perf_adjust_freq_unthr_context(ctx, throttled);
2468+
2469+
ctx = cpuctx->task_ctx;
2470+
if (ctx)
2471+
perf_adjust_freq_unthr_context(ctx, throttled);
2472+
24522473
if (cpuctx->jiffies_interval == 1 ||
24532474
!(jiffies % cpuctx->jiffies_interval))
24542475
perf_rotate_context(cpuctx);
@@ -4509,6 +4530,7 @@ static int __perf_event_overflow(struct perf_event *event,
45094530
{
45104531
int events = atomic_read(&event->event_limit);
45114532
struct hw_perf_event *hwc = &event->hw;
4533+
u64 seq;
45124534
int ret = 0;
45134535

45144536
/*
@@ -4518,14 +4540,20 @@ static int __perf_event_overflow(struct perf_event *event,
45184540
if (unlikely(!is_sampling_event(event)))
45194541
return 0;
45204542

4521-
if (unlikely(hwc->interrupts >= max_samples_per_tick)) {
4522-
if (throttle) {
4543+
seq = __this_cpu_read(perf_throttled_seq);
4544+
if (seq != hwc->interrupts_seq) {
4545+
hwc->interrupts_seq = seq;
4546+
hwc->interrupts = 1;
4547+
} else {
4548+
hwc->interrupts++;
4549+
if (unlikely(throttle
4550+
&& hwc->interrupts >= max_samples_per_tick)) {
4551+
__this_cpu_inc(perf_throttled_count);
45234552
hwc->interrupts = MAX_INTERRUPTS;
45244553
perf_log_throttle(event, 0);
45254554
ret = 1;
45264555
}
4527-
} else
4528-
hwc->interrupts++;
4556+
}
45294557

45304558
if (event->attr.freq) {
45314559
u64 now = perf_clock();

0 commit comments

Comments
 (0)