Skip to content

Commit 28bdb6f

Browse files
committed
Merge pull-request openvelinux#64 -- 'velinux-kernel-intel/cwf-pmu-6.6' into intel-6.6-velinux
== Description This is to backport PMU core/uncore/tool upstream patches for CWF platform. == Test - core PMU perf counting test ``` [root@cwf linux]# tools/perf/perf stat -a sleep 1 Performance counter stats for 'system wide': 491,357.35 msec cpu-clock # 482.195 CPUs utilized 2,471 context-switches # 5.029 /sec 481 cpu-migrations # 0.979 /sec 88 page-faults # 0.179 /sec 650,502,887 cycles # 0.001 GHz 185,129,269 instructions # 0.28 insn per cycle 37,198,246 branches # 75.705 K/sec 216,984 branch-misses # 0.58% of all branches 1.019001085 seconds time elapsed ``` - core PMU perf recording tests (fixed and GP counters) pass. ``` [root@cwf linux]# tools/perf/perf record -e instructions -Iax,bx -b -c 100000 sleep 1 [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.047 MB perf.data (27 samples) ] [root@cwf linux]# tools/perf/perf record -e branches -Iax,bx -b -c 10000 sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.066 MB perf.data (60 samples) ] ``` - uncore devices can be seen in sysfs ``` [root@cwf linux]# ls /sys/devices/* | grep uncore ``` - uncore PMU perf counting tests pass. ``` [root@cwf linux]# tools/perf/perf stat -e uncore_upi/event=0x1/,uncore_cha/event=0x1/,uncore_imc/event=0x1/ -a sleep 1 Performance counter stats for 'system wide': 25,144,619,084 uncore_upi/event=0x1/ 109,517,186,568 uncore_cha/event=0x1/ 22,178,643,523 uncore_imc/event=0x1/ 1.004042980 seconds time elapsed ``` - CWF specific perf event counting test pass. ``` [root@cwf linux]# tools/perf/perf stat -e LONGEST_LAT_CACHE.MISS,LONGEST_LAT_CACHE.REFERENCE -a sleep 1 Performance counter stats for 'system wide': 825,684 LONGEST_LAT_CACHE.MISS 11,596,700 LONGEST_LAT_CACHE.REFERENCE 1.014799623 seconds time elapsed ``` - CWF specific perf event sampling test pass. ``` [root@cwf linux]# tools/perf/perf record -e LONGEST_LAT_CACHE.MISS,LONGEST_LAT_CACHE.REFERENCE -c 10000 -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.852 MB perf.data (1082 samples) ] ``` - GNR TPMI based RAPL PMU events available ``` $ perf list | grep -i energy power/energy-pkg/ [Kernel PMU event] power/energy-ram/ [Kernel PMU event] ```
2 parents 3713c15 + 1f9d89d commit 28bdb6f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+3969
-1533
lines changed

arch/x86/events/amd/core.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -417,7 +417,7 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
417417
* be removed on one CPU at a time AND PMU is disabled
418418
* when we come here
419419
*/
420-
for (i = 0; i < x86_pmu.num_counters; i++) {
420+
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
421421
if (cmpxchg(nb->owners + i, event, NULL) == event)
422422
break;
423423
}
@@ -484,7 +484,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev
484484
* because of successive calls to x86_schedule_events() from
485485
* hw_perf_group_sched_in() without hw_perf_enable()
486486
*/
487-
for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
487+
for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) {
488488
if (new == -1 || hwc->idx == idx)
489489
/* assign free slot, prefer hwc->idx */
490490
old = cmpxchg(nb->owners + idx, NULL, event);
@@ -527,7 +527,7 @@ static struct amd_nb *amd_alloc_nb(int cpu)
527527
/*
528528
* initialize all possible NB constraints
529529
*/
530-
for (i = 0; i < x86_pmu.num_counters; i++) {
530+
for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
531531
__set_bit(i, nb->event_constraints[i].idxmsk);
532532
nb->event_constraints[i].weight = 1;
533533
}
@@ -720,7 +720,7 @@ static void amd_pmu_check_overflow(void)
720720
* counters are always enabled when this function is called and
721721
* ARCH_PERFMON_EVENTSEL_INT is always set.
722722
*/
723-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
723+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
724724
if (!test_bit(idx, cpuc->active_mask))
725725
continue;
726726

@@ -740,7 +740,7 @@ static void amd_pmu_enable_all(int added)
740740

741741
amd_brs_enable_all();
742742

743-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
743+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
744744
/* only activate events which are marked as active */
745745
if (!test_bit(idx, cpuc->active_mask))
746746
continue;
@@ -933,7 +933,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
933933
/* Clear any reserved bits set by buggy microcode */
934934
status &= amd_pmu_global_cntr_mask;
935935

936-
for (idx = 0; idx < x86_pmu.num_counters; idx++) {
936+
for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) {
937937
if (!test_bit(idx, cpuc->active_mask))
938938
continue;
939939

@@ -1273,7 +1273,7 @@ static __initconst const struct x86_pmu amd_pmu = {
12731273
.addr_offset = amd_pmu_addr_offset,
12741274
.event_map = amd_pmu_event_map,
12751275
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
1276-
.num_counters = AMD64_NUM_COUNTERS,
1276+
.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0),
12771277
.add = amd_pmu_add_event,
12781278
.del = amd_pmu_del_event,
12791279
.cntval_bits = 48,
@@ -1372,7 +1372,7 @@ static int __init amd_core_pmu_init(void)
13721372
*/
13731373
x86_pmu.eventsel = MSR_F15H_PERF_CTL;
13741374
x86_pmu.perfctr = MSR_F15H_PERF_CTR;
1375-
x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE;
1375+
x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0);
13761376

13771377
/* Check for Performance Monitoring v2 support */
13781378
if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) {
@@ -1382,9 +1382,9 @@ static int __init amd_core_pmu_init(void)
13821382
x86_pmu.version = 2;
13831383

13841384
/* Find the number of available Core PMCs */
1385-
x86_pmu.num_counters = ebx.split.num_core_pmc;
1385+
x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0);
13861386

1387-
amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1;
1387+
amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64;
13881388

13891389
/* Update PMC handling functions */
13901390
x86_pmu.enable_all = amd_pmu_v2_enable_all;
@@ -1412,12 +1412,12 @@ static int __init amd_core_pmu_init(void)
14121412
* even numbered counter that has a consecutive adjacent odd
14131413
* numbered counter following it.
14141414
*/
1415-
for (i = 0; i < x86_pmu.num_counters - 1; i += 2)
1415+
for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2)
14161416
even_ctr_mask |= BIT_ULL(i);
14171417

14181418
pair_constraint = (struct event_constraint)
14191419
__EVENT_CONSTRAINT(0, even_ctr_mask, 0,
1420-
x86_pmu.num_counters / 2, 0,
1420+
x86_pmu_max_num_counters(NULL) / 2, 0,
14211421
PERF_X86_EVENT_PAIR);
14221422

14231423
x86_pmu.get_event_constraints = amd_get_event_constraints_f17h;

0 commit comments

Comments
 (0)