mirror of https://github.com/torvalds/linux.git
perf/x86/intel: Process arch-PEBS records or record fragments
A significant difference with adaptive PEBS is that arch-PEBS record supports fragments which means an arch-PEBS record could be split into several independent fragments which have its own arch-PEBS header in each fragment. This patch defines architectural PEBS record layout structures and add helpers to process arch-PEBS records or fragments. Only legacy PEBS groups like basic, GPR, XMM and LBR groups are supported in this patch, the new added YMM/ZMM/OPMASK vector registers capturing would be supported in the future. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251029102136.61364-9-dapeng1.mi@linux.intel.com
This commit is contained in:
parent
167cde7dc9
commit
d21954c8a0
|
|
@ -3215,6 +3215,19 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||||
status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
|
status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Arch PEBS sets bit 54 in the global status register
|
||||||
|
*/
|
||||||
|
if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT,
|
||||||
|
(unsigned long *)&status)) {
|
||||||
|
handled++;
|
||||||
|
static_call(x86_pmu_drain_pebs)(regs, &data);
|
||||||
|
|
||||||
|
if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
|
||||||
|
is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
|
||||||
|
status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Intel PT
|
* Intel PT
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -2270,6 +2270,117 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||||
format_group);
|
format_group);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
|
||||||
|
{
|
||||||
|
/* Continue bit or null PEBS record indicates fragment follows. */
|
||||||
|
return header->cont || !(header->format & GENMASK_ULL(63, 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void setup_arch_pebs_sample_data(struct perf_event *event,
|
||||||
|
struct pt_regs *iregs,
|
||||||
|
void *__pebs,
|
||||||
|
struct perf_sample_data *data,
|
||||||
|
struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
u64 sample_type = event->attr.sample_type;
|
||||||
|
struct arch_pebs_header *header = NULL;
|
||||||
|
struct arch_pebs_aux *meminfo = NULL;
|
||||||
|
struct arch_pebs_gprs *gprs = NULL;
|
||||||
|
struct x86_perf_regs *perf_regs;
|
||||||
|
void *next_record;
|
||||||
|
void *at = __pebs;
|
||||||
|
|
||||||
|
if (at == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
perf_regs = container_of(regs, struct x86_perf_regs, regs);
|
||||||
|
perf_regs->xmm_regs = NULL;
|
||||||
|
|
||||||
|
__setup_perf_sample_data(event, iregs, data);
|
||||||
|
|
||||||
|
*regs = *iregs;
|
||||||
|
|
||||||
|
again:
|
||||||
|
header = at;
|
||||||
|
next_record = at + sizeof(struct arch_pebs_header);
|
||||||
|
if (header->basic) {
|
||||||
|
struct arch_pebs_basic *basic = next_record;
|
||||||
|
u16 retire = 0;
|
||||||
|
|
||||||
|
next_record = basic + 1;
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
|
||||||
|
retire = basic->valid ? basic->retire : 0;
|
||||||
|
__setup_pebs_basic_group(event, regs, data, sample_type,
|
||||||
|
basic->ip, basic->tsc, retire);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The record for MEMINFO is in front of GP
|
||||||
|
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
|
||||||
|
* Save the pointer here but process later.
|
||||||
|
*/
|
||||||
|
if (header->aux) {
|
||||||
|
meminfo = next_record;
|
||||||
|
next_record = meminfo + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->gpr) {
|
||||||
|
gprs = next_record;
|
||||||
|
next_record = gprs + 1;
|
||||||
|
|
||||||
|
__setup_pebs_gpr_group(event, regs,
|
||||||
|
(struct pebs_gprs *)gprs,
|
||||||
|
sample_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->aux) {
|
||||||
|
u64 ax = gprs ? gprs->ax : 0;
|
||||||
|
|
||||||
|
__setup_pebs_meminfo_group(event, data, sample_type,
|
||||||
|
meminfo->cache_latency,
|
||||||
|
meminfo->instr_latency,
|
||||||
|
meminfo->address, meminfo->aux,
|
||||||
|
meminfo->tsx_tuning, ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->xmm) {
|
||||||
|
struct pebs_xmm *xmm;
|
||||||
|
|
||||||
|
next_record += sizeof(struct arch_pebs_xer_header);
|
||||||
|
|
||||||
|
xmm = next_record;
|
||||||
|
perf_regs->xmm_regs = xmm->xmm;
|
||||||
|
next_record = xmm + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->lbr) {
|
||||||
|
struct arch_pebs_lbr_header *lbr_header = next_record;
|
||||||
|
struct lbr_entry *lbr;
|
||||||
|
int num_lbr;
|
||||||
|
|
||||||
|
next_record = lbr_header + 1;
|
||||||
|
lbr = next_record;
|
||||||
|
|
||||||
|
num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
|
||||||
|
lbr_header->depth :
|
||||||
|
header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
|
||||||
|
next_record += num_lbr * sizeof(struct lbr_entry);
|
||||||
|
|
||||||
|
if (has_branch_stack(event)) {
|
||||||
|
intel_pmu_store_pebs_lbrs(lbr);
|
||||||
|
intel_pmu_lbr_save_brstack(data, cpuc, event);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parse followed fragments if there are. */
|
||||||
|
if (arch_pebs_record_continued(header)) {
|
||||||
|
at = at + header->size;
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline void *
|
static inline void *
|
||||||
get_next_pebs_record_by_bit(void *base, void *top, int bit)
|
get_next_pebs_record_by_bit(void *base, void *top, int bit)
|
||||||
{
|
{
|
||||||
|
|
@ -2753,6 +2864,78 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
||||||
setup_pebs_adaptive_sample_data);
|
setup_pebs_adaptive_sample_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
|
||||||
|
struct perf_sample_data *data)
|
||||||
|
{
|
||||||
|
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
|
||||||
|
void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
union arch_pebs_index index;
|
||||||
|
struct x86_perf_regs perf_regs;
|
||||||
|
struct pt_regs *regs = &perf_regs.regs;
|
||||||
|
void *base, *at, *top;
|
||||||
|
u64 mask;
|
||||||
|
|
||||||
|
rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);
|
||||||
|
|
||||||
|
if (unlikely(!index.wr)) {
|
||||||
|
intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
base = cpuc->ds_pebs_vaddr;
|
||||||
|
top = (void *)((u64)cpuc->ds_pebs_vaddr +
|
||||||
|
(index.wr << ARCH_PEBS_INDEX_WR_SHIFT));
|
||||||
|
|
||||||
|
index.wr = 0;
|
||||||
|
index.full = 0;
|
||||||
|
wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
|
||||||
|
|
||||||
|
mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
|
||||||
|
|
||||||
|
if (!iregs)
|
||||||
|
iregs = &dummy_iregs;
|
||||||
|
|
||||||
|
/* Process all but the last event for each counter. */
|
||||||
|
for (at = base; at < top;) {
|
||||||
|
struct arch_pebs_header *header;
|
||||||
|
struct arch_pebs_basic *basic;
|
||||||
|
u64 pebs_status;
|
||||||
|
|
||||||
|
header = at;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!header->size))
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* 1st fragment or single record must have basic group */
|
||||||
|
if (!header->basic) {
|
||||||
|
at += header->size;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
basic = at + sizeof(struct arch_pebs_header);
|
||||||
|
pebs_status = mask & basic->applicable_counters;
|
||||||
|
__intel_pmu_handle_pebs_record(iregs, regs, data, at,
|
||||||
|
pebs_status, counts, last,
|
||||||
|
setup_arch_pebs_sample_data);
|
||||||
|
|
||||||
|
/* Skip non-last fragments */
|
||||||
|
while (arch_pebs_record_continued(header)) {
|
||||||
|
if (!header->size)
|
||||||
|
break;
|
||||||
|
at += header->size;
|
||||||
|
header = at;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Skip last fragment or the single record */
|
||||||
|
at += header->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
|
||||||
|
counts, last,
|
||||||
|
setup_arch_pebs_sample_data);
|
||||||
|
}
|
||||||
|
|
||||||
static void __init intel_arch_pebs_init(void)
|
static void __init intel_arch_pebs_init(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
|
@ -2762,6 +2945,7 @@ static void __init intel_arch_pebs_init(void)
|
||||||
*/
|
*/
|
||||||
x86_pmu.arch_pebs = 1;
|
x86_pmu.arch_pebs = 1;
|
||||||
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
|
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
|
||||||
|
x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
|
||||||
x86_pmu.pebs_capable = ~0ULL;
|
x86_pmu.pebs_capable = ~0ULL;
|
||||||
|
|
||||||
x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
|
x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
|
||||||
|
|
|
||||||
|
|
@ -327,6 +327,12 @@
|
||||||
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
|
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
|
||||||
PERF_CAP_PEBS_TIMING_INFO)
|
PERF_CAP_PEBS_TIMING_INFO)
|
||||||
|
|
||||||
|
/* Arch PEBS */
|
||||||
|
#define MSR_IA32_PEBS_BASE 0x000003f4
|
||||||
|
#define MSR_IA32_PEBS_INDEX 0x000003f5
|
||||||
|
#define ARCH_PEBS_OFFSET_MASK 0x7fffff
|
||||||
|
#define ARCH_PEBS_INDEX_WR_SHIFT 4
|
||||||
|
|
||||||
#define MSR_IA32_RTIT_CTL 0x00000570
|
#define MSR_IA32_RTIT_CTL 0x00000570
|
||||||
#define RTIT_CTL_TRACEEN BIT(0)
|
#define RTIT_CTL_TRACEEN BIT(0)
|
||||||
#define RTIT_CTL_CYCLEACC BIT(1)
|
#define RTIT_CTL_CYCLEACC BIT(1)
|
||||||
|
|
|
||||||
|
|
@ -437,6 +437,8 @@ static inline bool is_topdown_idx(int idx)
|
||||||
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
|
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
|
||||||
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
|
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
|
||||||
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
|
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
|
||||||
|
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54
|
||||||
|
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
|
||||||
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
|
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
|
||||||
|
|
||||||
#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
|
#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
|
||||||
|
|
@ -507,6 +509,100 @@ struct pebs_cntr_header {
|
||||||
|
|
||||||
#define INTEL_CNTR_METRICS 0x3
|
#define INTEL_CNTR_METRICS 0x3
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Arch PEBS
|
||||||
|
*/
|
||||||
|
union arch_pebs_index {
|
||||||
|
struct {
|
||||||
|
u64 rsvd:4,
|
||||||
|
wr:23,
|
||||||
|
rsvd2:4,
|
||||||
|
full:1,
|
||||||
|
en:1,
|
||||||
|
rsvd3:3,
|
||||||
|
thresh:23,
|
||||||
|
rsvd4:5;
|
||||||
|
};
|
||||||
|
u64 whole;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_header {
|
||||||
|
union {
|
||||||
|
u64 format;
|
||||||
|
struct {
|
||||||
|
u64 size:16, /* Record size */
|
||||||
|
rsvd:14,
|
||||||
|
mode:1, /* 64BIT_MODE */
|
||||||
|
cont:1,
|
||||||
|
rsvd2:3,
|
||||||
|
cntr:5,
|
||||||
|
lbr:2,
|
||||||
|
rsvd3:7,
|
||||||
|
xmm:1,
|
||||||
|
ymmh:1,
|
||||||
|
rsvd4:2,
|
||||||
|
opmask:1,
|
||||||
|
zmmh:1,
|
||||||
|
h16zmm:1,
|
||||||
|
rsvd5:5,
|
||||||
|
gpr:1,
|
||||||
|
aux:1,
|
||||||
|
basic:1;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
u64 rsvd6;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_basic {
|
||||||
|
u64 ip;
|
||||||
|
u64 applicable_counters;
|
||||||
|
u64 tsc;
|
||||||
|
u64 retire :16, /* Retire Latency */
|
||||||
|
valid :1,
|
||||||
|
rsvd :47;
|
||||||
|
u64 rsvd2;
|
||||||
|
u64 rsvd3;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_aux {
|
||||||
|
u64 address;
|
||||||
|
u64 rsvd;
|
||||||
|
u64 rsvd2;
|
||||||
|
u64 rsvd3;
|
||||||
|
u64 rsvd4;
|
||||||
|
u64 aux;
|
||||||
|
u64 instr_latency :16,
|
||||||
|
pad2 :16,
|
||||||
|
cache_latency :16,
|
||||||
|
pad3 :16;
|
||||||
|
u64 tsx_tuning;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_gprs {
|
||||||
|
u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
|
||||||
|
u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp;
|
||||||
|
u64 rsvd;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_xer_header {
|
||||||
|
u64 xstate;
|
||||||
|
u64 rsvd;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define ARCH_PEBS_LBR_NAN 0x0
|
||||||
|
#define ARCH_PEBS_LBR_NUM_8 0x1
|
||||||
|
#define ARCH_PEBS_LBR_NUM_16 0x2
|
||||||
|
#define ARCH_PEBS_LBR_NUM_VAR 0x3
|
||||||
|
#define ARCH_PEBS_BASE_LBR_ENTRIES 8
|
||||||
|
struct arch_pebs_lbr_header {
|
||||||
|
u64 rsvd;
|
||||||
|
u64 ctl;
|
||||||
|
u64 depth;
|
||||||
|
u64 ler_from;
|
||||||
|
u64 ler_to;
|
||||||
|
u64 ler_info;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AMD Extended Performance Monitoring and Debug cpuid feature detection
|
* AMD Extended Performance Monitoring and Debug cpuid feature detection
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue