Skip to content

Commit d0946a8

Browse files
Kan LiangPeter Zijlstra
Kan Liang
authored and
Peter Zijlstra
committed
perf/x86/intel: Hybrid PMU support for perf capabilities
Some platforms, e.g. Alder Lake, have hybrid architecture. Although most PMU capabilities are the same, there are still some unique PMU capabilities for different hybrid PMUs. Perf should register a dedicated pmu for each hybrid PMU. Add a new struct x86_hybrid_pmu, which saves the dedicated pmu and capabilities for each hybrid PMU. The architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicates the architecture features which are available on all hybrid PMUs. The architecture features are stored in the global x86_pmu.intel_cap. For Alder Lake, the model-specific features are perf metrics and PEBS-via-PT. The corresponding bits of the global x86_pmu.intel_cap should be 0 for these two features. Perf should not use the global intel_cap to check the features on a hybrid system. Add a dedicated intel_cap in the x86_hybrid_pmu to store the model-specific capabilities. Use the dedicated intel_cap to replace the global intel_cap for thse two features. The dedicated intel_cap will be set in the following "Add Alder Lake Hybrid support" patch. Add is_hybrid() to distinguish a hybrid system. ADL may have an alternative configuration. With that configuration, the X86_FEATURE_HYBRID_CPU is not set. Perf cannot rely on the feature bit. Add a new static_key_false, perf_is_hybrid, to indicate a hybrid system. It will be assigned in the following "Add Alder Lake Hybrid support" patch as well. Suggested-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 61e76d5 commit d0946a8

File tree

5 files changed

+60
-7
lines changed

5 files changed

+60
-7
lines changed

arch/x86/events/core.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
5454

5555
DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
5656
DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
57+
DEFINE_STATIC_KEY_FALSE(perf_is_hybrid);
5758

5859
/*
5960
* This here uses DEFINE_STATIC_CALL_NULL() to get a static_call defined
@@ -1105,8 +1106,9 @@ static void del_nr_metric_event(struct cpu_hw_events *cpuc,
11051106
static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
11061107
int max_count, int n)
11071108
{
1109+
union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap);
11081110

1109-
if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
1111+
if (intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
11101112
return -EINVAL;
11111113

11121114
if (n >= max_count + cpuc->n_metric)
@@ -1581,6 +1583,7 @@ void x86_pmu_stop(struct perf_event *event, int flags)
15811583
static void x86_pmu_del(struct perf_event *event, int flags)
15821584
{
15831585
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1586+
union perf_capabilities intel_cap = hybrid(cpuc->pmu, intel_cap);
15841587
int i;
15851588

15861589
/*
@@ -1620,7 +1623,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
16201623
}
16211624
cpuc->event_constraint[i-1] = NULL;
16221625
--cpuc->n_events;
1623-
if (x86_pmu.intel_cap.perf_metrics)
1626+
if (intel_cap.perf_metrics)
16241627
del_nr_metric_event(cpuc, event);
16251628

16261629
perf_event_update_userpage(event);

arch/x86/events/intel/core.c

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3646,6 +3646,12 @@ static inline bool is_mem_loads_aux_event(struct perf_event *event)
36463646
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0x03, .umask=0x82);
36473647
}
36483648

3649+
static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
3650+
{
3651+
union perf_capabilities *intel_cap = &hybrid(event->pmu, intel_cap);
3652+
3653+
return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
3654+
}
36493655

36503656
static int intel_pmu_hw_config(struct perf_event *event)
36513657
{
@@ -3712,7 +3718,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
37123718
* with a slots event as group leader. When the slots event
37133719
* is used in a metrics group, it too cannot support sampling.
37143720
*/
3715-
if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
3721+
if (intel_pmu_has_cap(event, PERF_CAP_METRICS_IDX) && is_topdown_event(event)) {
37163722
if (event->attr.config1 || event->attr.config2)
37173723
return -EINVAL;
37183724

@@ -4219,8 +4225,16 @@ static void intel_pmu_cpu_starting(int cpu)
42194225
if (x86_pmu.version > 1)
42204226
flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
42214227

4222-
/* Disable perf metrics if any added CPU doesn't support it. */
4223-
if (x86_pmu.intel_cap.perf_metrics) {
4228+
/*
4229+
* Disable perf metrics if any added CPU doesn't support it.
4230+
*
4231+
* Turn off the check for a hybrid architecture, because the
4232+
* architecture MSR, MSR_IA32_PERF_CAPABILITIES, only indicate
4233+
* the architecture features. The perf metrics is a model-specific
4234+
* feature for now. The corresponding bit should always be 0 on
4235+
* a hybrid platform, e.g., Alder Lake.
4236+
*/
4237+
if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) {
42244238
union perf_capabilities perf_cap;
42254239

42264240
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap.capabilities);
@@ -5770,7 +5784,7 @@ __init int intel_pmu_init(void)
57705784
pr_cont("full-width counters, ");
57715785
}
57725786

5773-
if (x86_pmu.intel_cap.perf_metrics)
5787+
if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
57745788
x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
57755789

57765790
return 0;

arch/x86/events/intel/ds.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2205,7 +2205,7 @@ void __init intel_ds_init(void)
22052205
}
22062206
pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
22072207

2208-
if (x86_pmu.intel_cap.pebs_output_pt_available) {
2208+
if (!is_hybrid() && x86_pmu.intel_cap.pebs_output_pt_available) {
22092209
pr_cont("PEBS-via-PT, ");
22102210
x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
22112211
}

arch/x86/events/perf_event.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,29 @@ enum {
631631
x86_lbr_exclusive_max,
632632
};
633633

634+
struct x86_hybrid_pmu {
635+
struct pmu pmu;
636+
union perf_capabilities intel_cap;
637+
};
638+
639+
static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
640+
{
641+
return container_of(pmu, struct x86_hybrid_pmu, pmu);
642+
}
643+
644+
extern struct static_key_false perf_is_hybrid;
645+
#define is_hybrid() static_branch_unlikely(&perf_is_hybrid)
646+
647+
#define hybrid(_pmu, _field) \
648+
(*({ \
649+
typeof(&x86_pmu._field) __Fp = &x86_pmu._field; \
650+
\
651+
if (is_hybrid() && (_pmu)) \
652+
__Fp = &hybrid_pmu(_pmu)->_field; \
653+
\
654+
__Fp; \
655+
}))
656+
634657
/*
635658
* struct x86_pmu - generic x86 pmu
636659
*/
@@ -817,6 +840,16 @@ struct x86_pmu {
817840
int (*check_period) (struct perf_event *event, u64 period);
818841

819842
int (*aux_output_match) (struct perf_event *event);
843+
844+
/*
845+
* Hybrid support
846+
*
847+
* Most PMU capabilities are the same among different hybrid PMUs.
848+
* The global x86_pmu saves the architecture capabilities, which
849+
* are available for all PMUs. The hybrid_pmu only includes the
850+
* unique capabilities.
851+
*/
852+
struct x86_hybrid_pmu *hybrid_pmu;
820853
};
821854

822855
struct x86_perf_task_context_opt {

arch/x86/include/asm/msr-index.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,9 @@
185185
#define MSR_PEBS_DATA_CFG 0x000003f2
186186
#define MSR_IA32_DS_AREA 0x00000600
187187
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
188+
#define PERF_CAP_METRICS_IDX 15
189+
#define PERF_CAP_PT_IDX 16
190+
188191
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
189192

190193
#define MSR_IA32_RTIT_CTL 0x00000570

0 commit comments

Comments
 (0)