mirror of https://github.com/torvalds/linux.git
perf/x86/intel: Setup PEBS data configuration and enable legacy groups
Different with legacy PEBS, arch-PEBS provides per-counter PEBS data configuration by programing MSR IA32_PMC_GPx/FXx_CFG_C MSRs. This patch obtains PEBS data configuration from event attribute and then writes the PEBS data configuration to MSR IA32_PMC_GPx/FXx_CFG_C and enable corresponding PEBS groups. Please notice this patch only enables XMM SIMD regs sampling for arch-PEBS, the other SIMD regs (OPMASK/YMM/ZMM) sampling on arch-PEBS would be supported after PMI based SIMD regs (OPMASK/YMM/ZMM) sampling is supported. Co-developed-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Kan Liang <kan.liang@linux.intel.com> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251029102136.61364-12-dapeng1.mi@linux.intel.com
This commit is contained in:
parent
e89c5d1f29
commit
52448a0a73
|
|
@ -2563,6 +2563,45 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
|
|||
cpuc->fixed_ctrl_val &= ~mask;
|
||||
}
|
||||
|
||||
static inline void __intel_pmu_update_event_ext(int idx, u64 ext)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
u32 msr;
|
||||
|
||||
if (idx < INTEL_PMC_IDX_FIXED) {
|
||||
msr = MSR_IA32_PMC_V6_GP0_CFG_C +
|
||||
x86_pmu.addr_offset(idx, false);
|
||||
} else {
|
||||
msr = MSR_IA32_PMC_V6_FX0_CFG_C +
|
||||
x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
|
||||
}
|
||||
|
||||
cpuc->cfg_c_val[idx] = ext;
|
||||
wrmsrq(msr, ext);
|
||||
}
|
||||
|
||||
static void intel_pmu_disable_event_ext(struct perf_event *event)
|
||||
{
|
||||
if (!x86_pmu.arch_pebs)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Only clear CFG_C MSR for PEBS counter group events,
|
||||
* it avoids the HW counter's value to be added into
|
||||
* other PEBS records incorrectly after PEBS counter
|
||||
* group events are disabled.
|
||||
*
|
||||
* For other events, it's unnecessary to clear CFG_C MSRs
|
||||
* since CFG_C doesn't take effect if counter is in
|
||||
* disabled state. That helps to reduce the WRMSR overhead
|
||||
* in context switches.
|
||||
*/
|
||||
if (!is_pebs_counter_event_group(event))
|
||||
return;
|
||||
|
||||
__intel_pmu_update_event_ext(event->hw.idx, 0);
|
||||
}
|
||||
|
||||
static void intel_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
|
@ -2571,9 +2610,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
|
|||
switch (idx) {
|
||||
case 0 ... INTEL_PMC_IDX_FIXED - 1:
|
||||
intel_clear_masks(event, idx);
|
||||
intel_pmu_disable_event_ext(event);
|
||||
x86_pmu_disable_event(event);
|
||||
break;
|
||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
||||
intel_pmu_disable_event_ext(event);
|
||||
fallthrough;
|
||||
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
||||
intel_pmu_disable_fixed(event);
|
||||
break;
|
||||
|
|
@ -2940,6 +2982,66 @@ static void intel_pmu_enable_acr(struct perf_event *event)
|
|||
|
||||
DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
|
||||
|
||||
static void intel_pmu_enable_event_ext(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
union arch_pebs_index old, new;
|
||||
struct arch_pebs_cap cap;
|
||||
u64 ext = 0;
|
||||
|
||||
if (!x86_pmu.arch_pebs)
|
||||
return;
|
||||
|
||||
cap = hybrid(cpuc->pmu, arch_pebs_cap);
|
||||
|
||||
if (event->attr.precise_ip) {
|
||||
u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event);
|
||||
|
||||
ext |= ARCH_PEBS_EN;
|
||||
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD)
|
||||
ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD;
|
||||
|
||||
if (pebs_data_cfg && cap.caps) {
|
||||
if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
|
||||
ext |= ARCH_PEBS_AUX & cap.caps;
|
||||
|
||||
if (pebs_data_cfg & PEBS_DATACFG_GP)
|
||||
ext |= ARCH_PEBS_GPR & cap.caps;
|
||||
|
||||
if (pebs_data_cfg & PEBS_DATACFG_XMMS)
|
||||
ext |= ARCH_PEBS_VECR_XMM & cap.caps;
|
||||
|
||||
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
|
||||
ext |= ARCH_PEBS_LBR & cap.caps;
|
||||
}
|
||||
|
||||
if (cpuc->n_pebs == cpuc->n_large_pebs)
|
||||
new.thresh = ARCH_PEBS_THRESH_MULTI;
|
||||
else
|
||||
new.thresh = ARCH_PEBS_THRESH_SINGLE;
|
||||
|
||||
rdmsrq(MSR_IA32_PEBS_INDEX, old.whole);
|
||||
if (new.thresh != old.thresh || !old.en) {
|
||||
if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) {
|
||||
/*
|
||||
* Large PEBS was enabled.
|
||||
* Drain PEBS buffer before applying the single PEBS.
|
||||
*/
|
||||
intel_pmu_drain_pebs_buffer();
|
||||
} else {
|
||||
new.wr = 0;
|
||||
new.full = 0;
|
||||
new.en = 1;
|
||||
wrmsrq(MSR_IA32_PEBS_INDEX, new.whole);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cpuc->cfg_c_val[hwc->idx] != ext)
|
||||
__intel_pmu_update_event_ext(hwc->idx, ext);
|
||||
}
|
||||
|
||||
static void intel_pmu_enable_event(struct perf_event *event)
|
||||
{
|
||||
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||
|
|
@ -2955,10 +3057,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
|
|||
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
|
||||
intel_set_masks(event, idx);
|
||||
static_call_cond(intel_pmu_enable_acr_event)(event);
|
||||
intel_pmu_enable_event_ext(event);
|
||||
__x86_pmu_enable_event(hwc, enable_mask);
|
||||
break;
|
||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
||||
static_call_cond(intel_pmu_enable_acr_event)(event);
|
||||
intel_pmu_enable_event_ext(event);
|
||||
fallthrough;
|
||||
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
||||
intel_pmu_enable_fixed(event);
|
||||
|
|
@ -5301,6 +5405,30 @@ static inline bool intel_pmu_broken_perf_cap(void)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline void __intel_update_pmu_caps(struct pmu *pmu)
|
||||
{
|
||||
struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id());
|
||||
|
||||
if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM)
|
||||
dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
|
||||
}
|
||||
|
||||
static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
|
||||
{
|
||||
u64 caps = hybrid(pmu, arch_pebs_cap).caps;
|
||||
|
||||
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
|
||||
if (caps & ARCH_PEBS_LBR)
|
||||
x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||
|
||||
if (!(caps & ARCH_PEBS_AUX))
|
||||
x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
|
||||
if (!(caps & ARCH_PEBS_GPR)) {
|
||||
x86_pmu.large_pebs_flags &=
|
||||
~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER);
|
||||
}
|
||||
}
|
||||
|
||||
#define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
|
||||
|
||||
static void update_pmu_cap(struct pmu *pmu)
|
||||
|
|
@ -5349,8 +5477,12 @@ static void update_pmu_cap(struct pmu *pmu)
|
|||
hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
|
||||
hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
|
||||
|
||||
if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask))
|
||||
if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) {
|
||||
x86_pmu.arch_pebs = 0;
|
||||
} else {
|
||||
__intel_update_pmu_caps(pmu);
|
||||
__intel_update_large_pebs_flags(pmu);
|
||||
}
|
||||
} else {
|
||||
WARN_ON(x86_pmu.arch_pebs == 1);
|
||||
x86_pmu.arch_pebs = 0;
|
||||
|
|
@ -5514,6 +5646,8 @@ static void intel_pmu_cpu_starting(int cpu)
|
|||
}
|
||||
}
|
||||
|
||||
__intel_update_pmu_caps(cpuc->pmu);
|
||||
|
||||
if (!cpuc->shared_regs)
|
||||
return;
|
||||
|
||||
|
|
|
|||
|
|
@ -1528,6 +1528,18 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
|
|||
}
|
||||
}
|
||||
|
||||
u64 intel_get_arch_pebs_data_config(struct perf_event *event)
|
||||
{
|
||||
u64 pebs_data_cfg = 0;
|
||||
|
||||
if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
|
||||
return 0;
|
||||
|
||||
pebs_data_cfg |= pebs_update_adaptive_cfg(event);
|
||||
|
||||
return pebs_data_cfg;
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_add(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
|
@ -2947,6 +2959,11 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
|
|||
|
||||
index.wr = 0;
|
||||
index.full = 0;
|
||||
index.en = 1;
|
||||
if (cpuc->n_pebs == cpuc->n_large_pebs)
|
||||
index.thresh = ARCH_PEBS_THRESH_MULTI;
|
||||
else
|
||||
index.thresh = ARCH_PEBS_THRESH_SINGLE;
|
||||
wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
|
||||
|
||||
mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
|
||||
|
|
|
|||
|
|
@ -304,6 +304,8 @@ struct cpu_hw_events {
|
|||
/* Intel ACR configuration */
|
||||
u64 acr_cfg_b[X86_PMC_IDX_MAX];
|
||||
u64 acr_cfg_c[X86_PMC_IDX_MAX];
|
||||
/* Cached CFG_C values */
|
||||
u64 cfg_c_val[X86_PMC_IDX_MAX];
|
||||
|
||||
/*
|
||||
* Intel LBR bits
|
||||
|
|
@ -1782,6 +1784,8 @@ void intel_pmu_pebs_data_source_cmt(void);
|
|||
|
||||
void intel_pmu_pebs_data_source_lnl(void);
|
||||
|
||||
u64 intel_get_arch_pebs_data_config(struct perf_event *event);
|
||||
|
||||
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
||||
|
||||
void intel_pt_interrupt(void);
|
||||
|
|
|
|||
|
|
@ -7,6 +7,13 @@
|
|||
#define PEBS_BUFFER_SHIFT 4
|
||||
#define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT)
|
||||
|
||||
/*
|
||||
* The largest PEBS record could consume a page, ensure
|
||||
* a record at least can be written after triggering PMI.
|
||||
*/
|
||||
#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
|
||||
#define ARCH_PEBS_THRESH_SINGLE 1
|
||||
|
||||
/* The maximal number of PEBS events: */
|
||||
#define MAX_PEBS_EVENTS_FMT4 8
|
||||
#define MAX_PEBS_EVENTS 32
|
||||
|
|
|
|||
|
|
@ -333,6 +333,14 @@
|
|||
#define ARCH_PEBS_OFFSET_MASK 0x7fffff
|
||||
#define ARCH_PEBS_INDEX_WR_SHIFT 4
|
||||
|
||||
#define ARCH_PEBS_RELOAD 0xffffffff
|
||||
#define ARCH_PEBS_LBR_SHIFT 40
|
||||
#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
|
||||
#define ARCH_PEBS_VECR_XMM BIT_ULL(49)
|
||||
#define ARCH_PEBS_GPR BIT_ULL(61)
|
||||
#define ARCH_PEBS_AUX BIT_ULL(62)
|
||||
#define ARCH_PEBS_EN BIT_ULL(63)
|
||||
|
||||
#define MSR_IA32_RTIT_CTL 0x00000570
|
||||
#define RTIT_CTL_TRACEEN BIT(0)
|
||||
#define RTIT_CTL_CYCLEACC BIT(1)
|
||||
|
|
|
|||
Loading…
Reference in New Issue