Skip to content

Commit 7b2c05a

Browse files
Kan LiangPeter Zijlstra
authored andcommitted
perf/x86/intel: Generic support for hardware TopDown metrics
Intro ===== The TopDown Microarchitecture Analysis (TMA) Method is a structured analysis methodology to identify critical performance bottlenecks in out-of-order processors. Current perf has supported the method. The method works well, but there is one problem. To collect the TopDown events, several GP counters have to be used. If a user wants to collect other events at the same time, the multiplexing probably be triggered, which impacts the accuracy. To free up the scarce GP counters, the hardware TopDown metrics feature is introduced from Ice Lake. The hardware implements an additional "metrics" register and a new Fixed Counter 3 that measures pipeline "slots". The TopDown events can be calculated from them instead. Events ====== The level 1 TopDown has four metrics. There is no event-code assigned to the TopDown metrics. Four metric events are exported as separate perf events, which map to the internal "metrics" counter register. Those events do not exist in hardware, but can be allocated by the scheduler. For the event mapping, a special 0x00 event code is used, which is reserved for fake events. The metric events start from umask 0x10. When setting up the metric events, they point to the Fixed Counter 3. They have to be specially handled. - Add the update_topdown_event() callback to read the additional metrics MSR and generate the metrics. - Add the set_topdown_event_period() callback to initialize metrics MSR and the fixed counter 3. - Add a variable n_metric_event to track the number of the accepted metrics events. The sharing between multiple users of the same metric without multiplexing is not allowed. - Only enable/disable the fixed counter 3 when there are no other active TopDown events, which avoid the unnecessary writing of the fixed control register. - Disable the PMU when reading the metrics event. The metrics MSR and the fixed counter 3 are read separately. The values may be modified by an NMI. All four metric events don't support sampling. Since they will be handled specially for event update, a flag PERF_X86_EVENT_TOPDOWN is introduced to indicate this case. The slots event can support both sampling and counting. For counting, the flag is also applied. For sampling, it will be handled normally as other normal events. Groups ====== The slots event is required in a Topdown group. To avoid reading the METRICS register multiple times, the metrics and slots value can only be updated by slots event in a group. All active slots and metrics events will be updated one time. Therefore, the slots event must be before any metric events in a Topdown group. NMI ====== The METRICS related register may be overflow. The bit 48 of the STATUS register will be set. If so, PERF_METRICS and Fixed counter 3 are required to be reset. The patch also update all active slots and metrics events in the NMI handler. The update_topdown_event() has to read two registers separately. The values may be modified by an NMI. PMU has to be disabled before calling the function. RDPMC ====== RDPMC is temporarily disabled. A later patch will enable it. Suggested-by: Peter Zijlstra <[email protected]> Signed-off-by: Kan Liang <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 9f0c4fa commit 7b2c05a

File tree

5 files changed

+257
-15
lines changed

5 files changed

+257
-15
lines changed

arch/x86/events/core.c

Lines changed: 53 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ u64 x86_perf_event_update(struct perf_event *event)
7676
if (unlikely(!hwc->event_base))
7777
return 0;
7878

79+
if (unlikely(is_topdown_count(event)) && x86_pmu.update_topdown_event)
80+
return x86_pmu.update_topdown_event(event);
81+
7982
/*
8083
* Careful: an NMI might modify the previous event value.
8184
*
@@ -1031,6 +1034,42 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
10311034
return unsched ? -EINVAL : 0;
10321035
}
10331036

1037+
static int add_nr_metric_event(struct cpu_hw_events *cpuc,
1038+
struct perf_event *event)
1039+
{
1040+
if (is_metric_event(event)) {
1041+
if (cpuc->n_metric == INTEL_TD_METRIC_NUM)
1042+
return -EINVAL;
1043+
cpuc->n_metric++;
1044+
}
1045+
1046+
return 0;
1047+
}
1048+
1049+
static void del_nr_metric_event(struct cpu_hw_events *cpuc,
1050+
struct perf_event *event)
1051+
{
1052+
if (is_metric_event(event))
1053+
cpuc->n_metric--;
1054+
}
1055+
1056+
static int collect_event(struct cpu_hw_events *cpuc, struct perf_event *event,
1057+
int max_count, int n)
1058+
{
1059+
1060+
if (x86_pmu.intel_cap.perf_metrics && add_nr_metric_event(cpuc, event))
1061+
return -EINVAL;
1062+
1063+
if (n >= max_count + cpuc->n_metric)
1064+
return -EINVAL;
1065+
1066+
cpuc->event_list[n] = event;
1067+
if (is_counter_pair(&event->hw))
1068+
cpuc->n_pair++;
1069+
1070+
return 0;
1071+
}
1072+
10341073
/*
10351074
* dogrp: true if must collect siblings events (group)
10361075
* returns total number of events and error code
@@ -1067,28 +1106,22 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
10671106
}
10681107

10691108
if (is_x86_event(leader)) {
1070-
if (n >= max_count)
1109+
if (collect_event(cpuc, leader, max_count, n))
10711110
return -EINVAL;
1072-
cpuc->event_list[n] = leader;
10731111
n++;
1074-
if (is_counter_pair(&leader->hw))
1075-
cpuc->n_pair++;
10761112
}
1113+
10771114
if (!dogrp)
10781115
return n;
10791116

10801117
for_each_sibling_event(event, leader) {
1081-
if (!is_x86_event(event) ||
1082-
event->state <= PERF_EVENT_STATE_OFF)
1118+
if (!is_x86_event(event) || event->state <= PERF_EVENT_STATE_OFF)
10831119
continue;
10841120

1085-
if (n >= max_count)
1121+
if (collect_event(cpuc, event, max_count, n))
10861122
return -EINVAL;
10871123

1088-
cpuc->event_list[n] = event;
10891124
n++;
1090-
if (is_counter_pair(&event->hw))
1091-
cpuc->n_pair++;
10921125
}
10931126
return n;
10941127
}
@@ -1110,6 +1143,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
11101143
hwc->event_base = 0;
11111144
break;
11121145

1146+
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
1147+
/* All the metric events are mapped onto the fixed counter 3. */
1148+
idx = INTEL_PMC_IDX_FIXED_SLOTS;
1149+
/* fall through */
11131150
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS-1:
11141151
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
11151152
hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 +
@@ -1245,6 +1282,10 @@ int x86_perf_event_set_period(struct perf_event *event)
12451282
if (unlikely(!hwc->event_base))
12461283
return 0;
12471284

1285+
if (unlikely(is_topdown_count(event)) &&
1286+
x86_pmu.set_topdown_event_period)
1287+
return x86_pmu.set_topdown_event_period(event);
1288+
12481289
/*
12491290
* If we are way outside a reasonable range then just skip forward:
12501291
*/
@@ -1529,6 +1570,8 @@ static void x86_pmu_del(struct perf_event *event, int flags)
15291570
}
15301571
cpuc->event_constraint[i-1] = NULL;
15311572
--cpuc->n_events;
1573+
if (x86_pmu.intel_cap.perf_metrics)
1574+
del_nr_metric_event(cpuc, event);
15321575

15331576
perf_event_update_userpage(event);
15341577

arch/x86/events/intel/core.c

Lines changed: 119 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2165,11 +2165,24 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
21652165
static void intel_pmu_disable_fixed(struct perf_event *event)
21662166
{
21672167
struct hw_perf_event *hwc = &event->hw;
2168-
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
21692168
u64 ctrl_val, mask;
2169+
int idx = hwc->idx;
21702170

2171-
mask = 0xfULL << (idx * 4);
2171+
if (is_topdown_idx(idx)) {
2172+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2173+
2174+
/*
2175+
* When there are other active TopDown events,
2176+
* don't disable the fixed counter 3.
2177+
*/
2178+
if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
2179+
return;
2180+
idx = INTEL_PMC_IDX_FIXED_SLOTS;
2181+
}
21722182

2183+
intel_clear_masks(event, idx);
2184+
2185+
mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
21732186
rdmsrl(hwc->config_base, ctrl_val);
21742187
ctrl_val &= ~mask;
21752188
wrmsrl(hwc->config_base, ctrl_val);
@@ -2186,7 +2199,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
21862199
x86_pmu_disable_event(event);
21872200
break;
21882201
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
2189-
intel_clear_masks(event, idx);
2202+
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
21902203
intel_pmu_disable_fixed(event);
21912204
break;
21922205
case INTEL_PMC_IDX_FIXED_BTS:
@@ -2219,19 +2232,49 @@ static void intel_pmu_del_event(struct perf_event *event)
22192232
intel_pmu_pebs_del(event);
22202233
}
22212234

2235+
static void intel_pmu_read_topdown_event(struct perf_event *event)
2236+
{
2237+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2238+
2239+
/* Only need to call update_topdown_event() once for group read. */
2240+
if ((cpuc->txn_flags & PERF_PMU_TXN_READ) &&
2241+
!is_slots_event(event))
2242+
return;
2243+
2244+
perf_pmu_disable(event->pmu);
2245+
x86_pmu.update_topdown_event(event);
2246+
perf_pmu_enable(event->pmu);
2247+
}
2248+
22222249
static void intel_pmu_read_event(struct perf_event *event)
22232250
{
22242251
if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
22252252
intel_pmu_auto_reload_read(event);
2253+
else if (is_topdown_count(event) && x86_pmu.update_topdown_event)
2254+
intel_pmu_read_topdown_event(event);
22262255
else
22272256
x86_perf_event_update(event);
22282257
}
22292258

22302259
static void intel_pmu_enable_fixed(struct perf_event *event)
22312260
{
22322261
struct hw_perf_event *hwc = &event->hw;
2233-
int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
22342262
u64 ctrl_val, mask, bits = 0;
2263+
int idx = hwc->idx;
2264+
2265+
if (is_topdown_idx(idx)) {
2266+
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2267+
/*
2268+
* When there are other active TopDown events,
2269+
* don't enable the fixed counter 3 again.
2270+
*/
2271+
if (*(u64 *)cpuc->active_mask & INTEL_PMC_OTHER_TOPDOWN_BITS(idx))
2272+
return;
2273+
2274+
idx = INTEL_PMC_IDX_FIXED_SLOTS;
2275+
}
2276+
2277+
intel_set_masks(event, idx);
22352278

22362279
/*
22372280
* Enable IRQ generation (0x8), if not PEBS,
@@ -2251,6 +2294,7 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
22512294
if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
22522295
bits |= 0x4;
22532296

2297+
idx -= INTEL_PMC_IDX_FIXED;
22542298
bits <<= (idx * 4);
22552299
mask = 0xfULL << (idx * 4);
22562300

@@ -2279,7 +2323,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
22792323
__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
22802324
break;
22812325
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
2282-
intel_set_masks(event, idx);
2326+
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
22832327
intel_pmu_enable_fixed(event);
22842328
break;
22852329
case INTEL_PMC_IDX_FIXED_BTS:
@@ -2439,6 +2483,15 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
24392483
intel_pt_interrupt();
24402484
}
24412485

2486+
/*
2487+
* Intel Perf mertrics
2488+
*/
2489+
if (__test_and_clear_bit(GLOBAL_STATUS_PERF_METRICS_OVF_BIT, (unsigned long *)&status)) {
2490+
handled++;
2491+
if (x86_pmu.update_topdown_event)
2492+
x86_pmu.update_topdown_event(NULL);
2493+
}
2494+
24422495
/*
24432496
* Checkpointed counters can lead to 'spurious' PMIs because the
24442497
* rollback caused by the PMI will have cleared the overflow status
@@ -3375,6 +3428,58 @@ static int intel_pmu_hw_config(struct perf_event *event)
33753428
if (event->attr.type != PERF_TYPE_RAW)
33763429
return 0;
33773430

3431+
/*
3432+
* Config Topdown slots and metric events
3433+
*
3434+
* The slots event on Fixed Counter 3 can support sampling,
3435+
* which will be handled normally in x86_perf_event_update().
3436+
*
3437+
* Metric events don't support sampling and require being paired
3438+
* with a slots event as group leader. When the slots event
3439+
* is used in a metrics group, it too cannot support sampling.
3440+
*/
3441+
if (x86_pmu.intel_cap.perf_metrics && is_topdown_event(event)) {
3442+
if (event->attr.config1 || event->attr.config2)
3443+
return -EINVAL;
3444+
3445+
/*
3446+
* The TopDown metrics events and slots event don't
3447+
* support any filters.
3448+
*/
3449+
if (event->attr.config & X86_ALL_EVENT_FLAGS)
3450+
return -EINVAL;
3451+
3452+
if (is_metric_event(event)) {
3453+
struct perf_event *leader = event->group_leader;
3454+
3455+
/* The metric events don't support sampling. */
3456+
if (is_sampling_event(event))
3457+
return -EINVAL;
3458+
3459+
/* The metric events require a slots group leader. */
3460+
if (!is_slots_event(leader))
3461+
return -EINVAL;
3462+
3463+
/*
3464+
* The leader/SLOTS must not be a sampling event for
3465+
* metric use; hardware requires it starts at 0 when used
3466+
* in conjunction with MSR_PERF_METRICS.
3467+
*/
3468+
if (is_sampling_event(leader))
3469+
return -EINVAL;
3470+
3471+
event->event_caps |= PERF_EV_CAP_SIBLING;
3472+
/*
3473+
* Only once we have a METRICs sibling do we
3474+
* need TopDown magic.
3475+
*/
3476+
leader->hw.flags |= PERF_X86_EVENT_TOPDOWN;
3477+
event->hw.flags |= PERF_X86_EVENT_TOPDOWN;
3478+
3479+
event->hw.flags &= ~PERF_X86_EVENT_RDPMC_ALLOWED;
3480+
}
3481+
}
3482+
33783483
if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
33793484
return 0;
33803485

@@ -5218,6 +5323,15 @@ __init int intel_pmu_init(void)
52185323
* counter, so do not extend mask to generic counters
52195324
*/
52205325
for_each_event_constraint(c, x86_pmu.event_constraints) {
5326+
/*
5327+
* Don't extend the topdown slots and metrics
5328+
* events to the generic counters.
5329+
*/
5330+
if (c->idxmsk64 & INTEL_PMC_MSK_TOPDOWN) {
5331+
c->weight = hweight64(c->idxmsk64);
5332+
continue;
5333+
}
5334+
52215335
if (c->cmask == FIXED_EVENT_FLAGS
52225336
&& c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
52235337
c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;

arch/x86/events/perf_event.h

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,31 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode)
7979
#define PERF_X86_EVENT_PEBS_VIA_PT 0x0800 /* use PT buffer for PEBS */
8080
#define PERF_X86_EVENT_PAIR 0x1000 /* Large Increment per Cycle */
8181
#define PERF_X86_EVENT_LBR_SELECT 0x2000 /* Save/Restore MSR_LBR_SELECT */
82+
#define PERF_X86_EVENT_TOPDOWN 0x4000 /* Count Topdown slots/metrics events */
83+
84+
static inline bool is_topdown_count(struct perf_event *event)
85+
{
86+
return event->hw.flags & PERF_X86_EVENT_TOPDOWN;
87+
}
88+
89+
static inline bool is_metric_event(struct perf_event *event)
90+
{
91+
u64 config = event->attr.config;
92+
93+
return ((config & ARCH_PERFMON_EVENTSEL_EVENT) == 0) &&
94+
((config & INTEL_ARCH_EVENT_MASK) >= INTEL_TD_METRIC_RETIRING) &&
95+
((config & INTEL_ARCH_EVENT_MASK) <= INTEL_TD_METRIC_MAX);
96+
}
97+
98+
static inline bool is_slots_event(struct perf_event *event)
99+
{
100+
return (event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_TD_SLOTS;
101+
}
102+
103+
static inline bool is_topdown_event(struct perf_event *event)
104+
{
105+
return is_metric_event(event) || is_slots_event(event);
106+
}
82107

83108
struct amd_nb {
84109
int nb_id; /* NorthBridge id */
@@ -284,6 +309,12 @@ struct cpu_hw_events {
284309
*/
285310
u64 tfa_shadow;
286311

312+
/*
313+
* Perf Metrics
314+
*/
315+
/* number of accepted metrics events */
316+
int n_metric;
317+
287318
/*
288319
* AMD specific bits
289320
*/
@@ -726,6 +757,12 @@ struct x86_pmu {
726757
*/
727758
atomic_t lbr_exclusive[x86_lbr_exclusive_max];
728759

760+
/*
761+
* Intel perf metrics
762+
*/
763+
u64 (*update_topdown_event)(struct perf_event *event);
764+
int (*set_topdown_event_period)(struct perf_event *event);
765+
729766
/*
730767
* perf task context (i.e. struct perf_event_context::task_ctx_data)
731768
* switch helper to bridge calls from perf/core to perf/x86.

arch/x86/include/asm/msr-index.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,7 @@
857857
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
858858
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
859859
#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
860+
#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c
860861
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
861862
#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
862863
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f

0 commit comments

Comments
 (0)