perf/x86/intel: Process arch-PEBS records or record fragments

A significant difference with adaptive PEBS is that arch-PEBS record
supports fragments which means an arch-PEBS record could be split into
several independent fragments which have its own arch-PEBS header in
each fragment.

This patch defines architectural PEBS record layout structures and add
helpers to process arch-PEBS records or fragments. Only legacy PEBS
groups like basic, GPR, XMM and LBR groups are supported in this patch,
the new added YMM/ZMM/OPMASK vector registers capturing would be
supported in the future.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20251029102136.61364-9-dapeng1.mi@linux.intel.com
This commit is contained in:
Dapeng Mi 2025-10-29 18:21:32 +08:00 committed by Peter Zijlstra
parent 167cde7dc9
commit d21954c8a0
4 changed files with 299 additions and 0 deletions

View File

@ -3215,6 +3215,19 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
}
/*
* Arch PEBS sets bit 54 in the global status register
*/
if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT,
(unsigned long *)&status)) {
handled++;
static_call(x86_pmu_drain_pebs)(regs, &data);
if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
}
/*
* Intel PT
*/

View File

@ -2270,6 +2270,117 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
format_group);
}
static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
{
/* Continue bit or null PEBS record indicates fragment follows. */
return header->cont || !(header->format & GENMASK_ULL(63, 16));
}
static void setup_arch_pebs_sample_data(struct perf_event *event,
struct pt_regs *iregs,
void *__pebs,
struct perf_sample_data *data,
struct pt_regs *regs)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
u64 sample_type = event->attr.sample_type;
struct arch_pebs_header *header = NULL;
struct arch_pebs_aux *meminfo = NULL;
struct arch_pebs_gprs *gprs = NULL;
struct x86_perf_regs *perf_regs;
void *next_record;
void *at = __pebs;
if (at == NULL)
return;
perf_regs = container_of(regs, struct x86_perf_regs, regs);
perf_regs->xmm_regs = NULL;
__setup_perf_sample_data(event, iregs, data);
*regs = *iregs;
again:
header = at;
next_record = at + sizeof(struct arch_pebs_header);
if (header->basic) {
struct arch_pebs_basic *basic = next_record;
u16 retire = 0;
next_record = basic + 1;
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
retire = basic->valid ? basic->retire : 0;
__setup_pebs_basic_group(event, regs, data, sample_type,
basic->ip, basic->tsc, retire);
}
/*
* The record for MEMINFO is in front of GP
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
* Save the pointer here but process later.
*/
if (header->aux) {
meminfo = next_record;
next_record = meminfo + 1;
}
if (header->gpr) {
gprs = next_record;
next_record = gprs + 1;
__setup_pebs_gpr_group(event, regs,
(struct pebs_gprs *)gprs,
sample_type);
}
if (header->aux) {
u64 ax = gprs ? gprs->ax : 0;
__setup_pebs_meminfo_group(event, data, sample_type,
meminfo->cache_latency,
meminfo->instr_latency,
meminfo->address, meminfo->aux,
meminfo->tsx_tuning, ax);
}
if (header->xmm) {
struct pebs_xmm *xmm;
next_record += sizeof(struct arch_pebs_xer_header);
xmm = next_record;
perf_regs->xmm_regs = xmm->xmm;
next_record = xmm + 1;
}
if (header->lbr) {
struct arch_pebs_lbr_header *lbr_header = next_record;
struct lbr_entry *lbr;
int num_lbr;
next_record = lbr_header + 1;
lbr = next_record;
num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
lbr_header->depth :
header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
next_record += num_lbr * sizeof(struct lbr_entry);
if (has_branch_stack(event)) {
intel_pmu_store_pebs_lbrs(lbr);
intel_pmu_lbr_save_brstack(data, cpuc, event);
}
}
/* Parse followed fragments if there are. */
if (arch_pebs_record_continued(header)) {
at = at + header->size;
goto again;
}
}
static inline void *
get_next_pebs_record_by_bit(void *base, void *top, int bit)
{
@ -2753,6 +2864,78 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
setup_pebs_adaptive_sample_data);
}
static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
struct perf_sample_data *data)
{
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
union arch_pebs_index index;
struct x86_perf_regs perf_regs;
struct pt_regs *regs = &perf_regs.regs;
void *base, *at, *top;
u64 mask;
rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);
if (unlikely(!index.wr)) {
intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
return;
}
base = cpuc->ds_pebs_vaddr;
top = (void *)((u64)cpuc->ds_pebs_vaddr +
(index.wr << ARCH_PEBS_INDEX_WR_SHIFT));
index.wr = 0;
index.full = 0;
wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
if (!iregs)
iregs = &dummy_iregs;
/* Process all but the last event for each counter. */
for (at = base; at < top;) {
struct arch_pebs_header *header;
struct arch_pebs_basic *basic;
u64 pebs_status;
header = at;
if (WARN_ON_ONCE(!header->size))
break;
/* 1st fragment or single record must have basic group */
if (!header->basic) {
at += header->size;
continue;
}
basic = at + sizeof(struct arch_pebs_header);
pebs_status = mask & basic->applicable_counters;
__intel_pmu_handle_pebs_record(iregs, regs, data, at,
pebs_status, counts, last,
setup_arch_pebs_sample_data);
/* Skip non-last fragments */
while (arch_pebs_record_continued(header)) {
if (!header->size)
break;
at += header->size;
header = at;
}
/* Skip last fragment or the single record */
at += header->size;
}
__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
counts, last,
setup_arch_pebs_sample_data);
}
static void __init intel_arch_pebs_init(void)
{
/*
@ -2762,6 +2945,7 @@ static void __init intel_arch_pebs_init(void)
*/
x86_pmu.arch_pebs = 1;
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
x86_pmu.pebs_capable = ~0ULL;
x86_pmu.pebs_enable = __intel_pmu_pebs_enable;

View File

@ -327,6 +327,12 @@
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
PERF_CAP_PEBS_TIMING_INFO)
/* Arch PEBS */
#define MSR_IA32_PEBS_BASE 0x000003f4
#define MSR_IA32_PEBS_INDEX 0x000003f5
#define ARCH_PEBS_OFFSET_MASK 0x7fffff
#define ARCH_PEBS_INDEX_WR_SHIFT 4
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
#define RTIT_CTL_CYCLEACC BIT(1)

View File

@ -437,6 +437,8 @@ static inline bool is_topdown_idx(int idx)
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
@ -507,6 +509,100 @@ struct pebs_cntr_header {
#define INTEL_CNTR_METRICS 0x3
/*
* Arch PEBS
*/
union arch_pebs_index {
struct {
u64 rsvd:4,
wr:23,
rsvd2:4,
full:1,
en:1,
rsvd3:3,
thresh:23,
rsvd4:5;
};
u64 whole;
};
struct arch_pebs_header {
union {
u64 format;
struct {
u64 size:16, /* Record size */
rsvd:14,
mode:1, /* 64BIT_MODE */
cont:1,
rsvd2:3,
cntr:5,
lbr:2,
rsvd3:7,
xmm:1,
ymmh:1,
rsvd4:2,
opmask:1,
zmmh:1,
h16zmm:1,
rsvd5:5,
gpr:1,
aux:1,
basic:1;
};
};
u64 rsvd6;
};
struct arch_pebs_basic {
u64 ip;
u64 applicable_counters;
u64 tsc;
u64 retire :16, /* Retire Latency */
valid :1,
rsvd :47;
u64 rsvd2;
u64 rsvd3;
};
struct arch_pebs_aux {
u64 address;
u64 rsvd;
u64 rsvd2;
u64 rsvd3;
u64 rsvd4;
u64 aux;
u64 instr_latency :16,
pad2 :16,
cache_latency :16,
pad3 :16;
u64 tsx_tuning;
};
struct arch_pebs_gprs {
u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp;
u64 rsvd;
};
struct arch_pebs_xer_header {
u64 xstate;
u64 rsvd;
};
#define ARCH_PEBS_LBR_NAN 0x0
#define ARCH_PEBS_LBR_NUM_8 0x1
#define ARCH_PEBS_LBR_NUM_16 0x2
#define ARCH_PEBS_LBR_NUM_VAR 0x3
#define ARCH_PEBS_BASE_LBR_ENTRIES 8
struct arch_pebs_lbr_header {
u64 rsvd;
u64 ctl;
u64 depth;
u64 ler_from;
u64 ler_to;
u64 ler_info;
};
/*
* AMD Extended Performance Monitoring and Debug cpuid feature detection
*/