mirror of https://github.com/torvalds/linux.git
perf/x86/intel: Initialize architectural PEBS
arch-PEBS leverages CPUID.23H.4/5 sub-leaves enumerate arch-PEBS supported capabilities and counters bitmap. This patch parses these 2 sub-leaves and initializes arch-PEBS capabilities and corresponding structures. Since IA32_PEBS_ENABLE and MSR_PEBS_DATA_CFG MSRs are no longer existed for arch-PEBS, arch-PEBS doesn't need to manipulate these MSRs. Thus add a simple pair of __intel_pmu_pebs_enable/disable() callbacks for arch-PEBS. Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://patch.msgid.link/20251029102136.61364-6-dapeng1.mi@linux.intel.com
This commit is contained in:
parent
5e4e355ae7
commit
d243d0bb64
|
|
@ -554,14 +554,22 @@ static inline int precise_br_compat(struct perf_event *event)
|
|||
return m == b;
|
||||
}
|
||||
|
||||
int x86_pmu_max_precise(void)
|
||||
int x86_pmu_max_precise(struct pmu *pmu)
|
||||
{
|
||||
int precise = 0;
|
||||
|
||||
/* Support for constant skid */
|
||||
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
|
||||
/* arch PEBS */
|
||||
if (x86_pmu.arch_pebs) {
|
||||
precise = 2;
|
||||
if (hybrid(pmu, arch_pebs_cap).pdists)
|
||||
precise++;
|
||||
|
||||
return precise;
|
||||
}
|
||||
|
||||
/* legacy PEBS - support for constant skid */
|
||||
precise++;
|
||||
/* Support for IP fixup */
|
||||
if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
|
||||
precise++;
|
||||
|
|
@ -569,13 +577,14 @@ int x86_pmu_max_precise(void)
|
|||
if (x86_pmu.pebs_prec_dist)
|
||||
precise++;
|
||||
}
|
||||
|
||||
return precise;
|
||||
}
|
||||
|
||||
int x86_pmu_hw_config(struct perf_event *event)
|
||||
{
|
||||
if (event->attr.precise_ip) {
|
||||
int precise = x86_pmu_max_precise();
|
||||
int precise = x86_pmu_max_precise(event->pmu);
|
||||
|
||||
if (event->attr.precise_ip > precise)
|
||||
return -EOPNOTSUPP;
|
||||
|
|
@ -2630,7 +2639,9 @@ static ssize_t max_precise_show(struct device *cdev,
|
|||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
|
||||
struct pmu *pmu = dev_get_drvdata(cdev);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise(pmu));
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(max_precise);
|
||||
|
|
|
|||
|
|
@ -5271,34 +5271,59 @@ static inline bool intel_pmu_broken_perf_cap(void)
|
|||
return false;
|
||||
}
|
||||
|
||||
#define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
|
||||
|
||||
static void update_pmu_cap(struct pmu *pmu)
|
||||
{
|
||||
unsigned int cntr, fixed_cntr, ecx, edx;
|
||||
union cpuid35_eax eax;
|
||||
union cpuid35_ebx ebx;
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
union cpuid35_eax eax_0;
|
||||
union cpuid35_ebx ebx_0;
|
||||
u64 cntrs_mask = 0;
|
||||
u64 pebs_mask = 0;
|
||||
u64 pdists_mask = 0;
|
||||
|
||||
cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
|
||||
cpuid(ARCH_PERFMON_EXT_LEAF, &eax_0.full, &ebx_0.full, &ecx, &edx);
|
||||
|
||||
if (ebx.split.umask2)
|
||||
if (ebx_0.split.umask2)
|
||||
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
|
||||
if (ebx.split.eq)
|
||||
if (ebx_0.split.eq)
|
||||
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;
|
||||
|
||||
if (eax.split.cntr_subleaf) {
|
||||
if (eax_0.split.cntr_subleaf) {
|
||||
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
|
||||
&cntr, &fixed_cntr, &ecx, &edx);
|
||||
hybrid(pmu, cntr_mask64) = cntr;
|
||||
hybrid(pmu, fixed_cntr_mask64) = fixed_cntr;
|
||||
&eax, &ebx, &ecx, &edx);
|
||||
hybrid(pmu, cntr_mask64) = eax;
|
||||
hybrid(pmu, fixed_cntr_mask64) = ebx;
|
||||
cntrs_mask = counter_mask(eax, ebx);
|
||||
}
|
||||
|
||||
if (eax.split.acr_subleaf) {
|
||||
if (eax_0.split.acr_subleaf) {
|
||||
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
|
||||
&cntr, &fixed_cntr, &ecx, &edx);
|
||||
&eax, &ebx, &ecx, &edx);
|
||||
/* The mask of the counters which can be reloaded */
|
||||
hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED);
|
||||
|
||||
hybrid(pmu, acr_cntr_mask64) = counter_mask(eax, ebx);
|
||||
/* The mask of the counters which can cause a reload of reloadable counters */
|
||||
hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
|
||||
hybrid(pmu, acr_cause_mask64) = counter_mask(ecx, edx);
|
||||
}
|
||||
|
||||
/* Bits[5:4] should be set simultaneously if arch-PEBS is supported */
|
||||
if (eax_0.split.pebs_caps_subleaf && eax_0.split.pebs_cnts_subleaf) {
|
||||
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_CAP_LEAF,
|
||||
&eax, &ebx, &ecx, &edx);
|
||||
hybrid(pmu, arch_pebs_cap).caps = (u64)ebx << 32;
|
||||
|
||||
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_COUNTER_LEAF,
|
||||
&eax, &ebx, &ecx, &edx);
|
||||
pebs_mask = counter_mask(eax, ecx);
|
||||
pdists_mask = counter_mask(ebx, edx);
|
||||
hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
|
||||
hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
|
||||
|
||||
if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask))
|
||||
x86_pmu.arch_pebs = 0;
|
||||
} else {
|
||||
WARN_ON(x86_pmu.arch_pebs == 1);
|
||||
x86_pmu.arch_pebs = 0;
|
||||
}
|
||||
|
||||
if (!intel_pmu_broken_perf_cap()) {
|
||||
|
|
@ -6252,7 +6277,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
|||
static umode_t
|
||||
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||
{
|
||||
return x86_pmu.ds_pebs ? attr->mode : 0;
|
||||
return intel_pmu_has_pebs() ? attr->mode : 0;
|
||||
}
|
||||
|
||||
static umode_t
|
||||
|
|
@ -7728,6 +7753,9 @@ __init int intel_pmu_init(void)
|
|||
if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
|
||||
update_pmu_cap(NULL);
|
||||
|
||||
if (x86_pmu.arch_pebs)
|
||||
pr_cont("Architectural PEBS, ");
|
||||
|
||||
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
|
||||
&x86_pmu.fixed_cntr_mask64,
|
||||
&x86_pmu.intel_ctrl);
|
||||
|
|
|
|||
|
|
@ -1531,6 +1531,15 @@ static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
|
|||
intel_pmu_drain_pebs_buffer();
|
||||
}
|
||||
|
||||
static void __intel_pmu_pebs_enable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_enable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
|
@ -1539,9 +1548,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
|||
struct debug_store *ds = cpuc->ds;
|
||||
unsigned int idx = hwc->idx;
|
||||
|
||||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||
|
||||
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
||||
__intel_pmu_pebs_enable(event);
|
||||
|
||||
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
|
||||
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
|
||||
|
|
@ -1603,14 +1610,22 @@ void intel_pmu_pebs_del(struct perf_event *event)
|
|||
pebs_update_state(needed_cb, cpuc, event, false);
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
static void __intel_pmu_pebs_disable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
intel_pmu_drain_large_pebs(cpuc);
|
||||
|
||||
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
||||
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_disable(struct perf_event *event)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
__intel_pmu_pebs_disable(event);
|
||||
|
||||
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
|
||||
(x86_pmu.version < 5))
|
||||
|
|
@ -1622,8 +1637,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
|||
|
||||
if (cpuc->enabled)
|
||||
wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
||||
|
||||
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||
}
|
||||
|
||||
void intel_pmu_pebs_enable_all(void)
|
||||
|
|
@ -2669,11 +2682,26 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
|||
}
|
||||
}
|
||||
|
||||
static void __init intel_arch_pebs_init(void)
|
||||
{
|
||||
/*
|
||||
* Current hybrid platforms always both support arch-PEBS or not
|
||||
* on all kinds of cores. So directly set x86_pmu.arch_pebs flag
|
||||
* if boot cpu supports arch-PEBS.
|
||||
*/
|
||||
x86_pmu.arch_pebs = 1;
|
||||
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
|
||||
x86_pmu.pebs_capable = ~0ULL;
|
||||
|
||||
x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
|
||||
x86_pmu.pebs_disable = __intel_pmu_pebs_disable;
|
||||
}
|
||||
|
||||
/*
|
||||
* PEBS probe and setup
|
||||
*/
|
||||
|
||||
void __init intel_pebs_init(void)
|
||||
static void __init intel_ds_pebs_init(void)
|
||||
{
|
||||
/*
|
||||
* No support for 32bit formats
|
||||
|
|
@ -2788,6 +2816,14 @@ void __init intel_pebs_init(void)
|
|||
}
|
||||
}
|
||||
|
||||
void __init intel_pebs_init(void)
|
||||
{
|
||||
if (x86_pmu.intel_cap.pebs_format == 0xf)
|
||||
intel_arch_pebs_init();
|
||||
else
|
||||
intel_ds_pebs_init();
|
||||
}
|
||||
|
||||
void perf_restore_debug_store(void)
|
||||
{
|
||||
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
|
||||
|
|
|
|||
|
|
@ -708,6 +708,12 @@ enum hybrid_pmu_type {
|
|||
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
|
||||
};
|
||||
|
||||
struct arch_pebs_cap {
|
||||
u64 caps;
|
||||
u64 counters;
|
||||
u64 pdists;
|
||||
};
|
||||
|
||||
struct x86_hybrid_pmu {
|
||||
struct pmu pmu;
|
||||
const char *name;
|
||||
|
|
@ -752,6 +758,8 @@ struct x86_hybrid_pmu {
|
|||
mid_ack :1,
|
||||
enabled_ack :1;
|
||||
|
||||
struct arch_pebs_cap arch_pebs_cap;
|
||||
|
||||
u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX];
|
||||
};
|
||||
|
||||
|
|
@ -906,7 +914,7 @@ struct x86_pmu {
|
|||
union perf_capabilities intel_cap;
|
||||
|
||||
/*
|
||||
* Intel DebugStore bits
|
||||
* Intel DebugStore and PEBS bits
|
||||
*/
|
||||
unsigned int bts :1,
|
||||
bts_active :1,
|
||||
|
|
@ -917,7 +925,8 @@ struct x86_pmu {
|
|||
pebs_no_tlb :1,
|
||||
pebs_no_isolation :1,
|
||||
pebs_block :1,
|
||||
pebs_ept :1;
|
||||
pebs_ept :1,
|
||||
arch_pebs :1;
|
||||
int pebs_record_size;
|
||||
int pebs_buffer_size;
|
||||
u64 pebs_events_mask;
|
||||
|
|
@ -929,6 +938,11 @@ struct x86_pmu {
|
|||
u64 rtm_abort_event;
|
||||
u64 pebs_capable;
|
||||
|
||||
/*
|
||||
* Intel Architectural PEBS
|
||||
*/
|
||||
struct arch_pebs_cap arch_pebs_cap;
|
||||
|
||||
/*
|
||||
* Intel LBR
|
||||
*/
|
||||
|
|
@ -1216,7 +1230,7 @@ int x86_reserve_hardware(void);
|
|||
|
||||
void x86_release_hardware(void);
|
||||
|
||||
int x86_pmu_max_precise(void);
|
||||
int x86_pmu_max_precise(struct pmu *pmu);
|
||||
|
||||
void hw_perf_lbr_event_destroy(struct perf_event *event);
|
||||
|
||||
|
|
@ -1791,6 +1805,11 @@ static inline int intel_pmu_max_num_pebs(struct pmu *pmu)
|
|||
return fls((u32)hybrid(pmu, pebs_events_mask));
|
||||
}
|
||||
|
||||
static inline bool intel_pmu_has_pebs(void)
|
||||
{
|
||||
return x86_pmu.ds_pebs || x86_pmu.arch_pebs;
|
||||
}
|
||||
|
||||
#else /* CONFIG_CPU_SUP_INTEL */
|
||||
|
||||
static inline void reserve_ds_buffers(void)
|
||||
|
|
|
|||
|
|
@ -200,6 +200,8 @@ union cpuid10_edx {
|
|||
#define ARCH_PERFMON_EXT_LEAF 0x00000023
|
||||
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
|
||||
#define ARCH_PERFMON_ACR_LEAF 0x2
|
||||
#define ARCH_PERFMON_PEBS_CAP_LEAF 0x4
|
||||
#define ARCH_PERFMON_PEBS_COUNTER_LEAF 0x5
|
||||
|
||||
union cpuid35_eax {
|
||||
struct {
|
||||
|
|
@ -210,7 +212,10 @@ union cpuid35_eax {
|
|||
unsigned int acr_subleaf:1;
|
||||
/* Events Sub-Leaf */
|
||||
unsigned int events_subleaf:1;
|
||||
unsigned int reserved:28;
|
||||
/* arch-PEBS Sub-Leaves */
|
||||
unsigned int pebs_caps_subleaf:1;
|
||||
unsigned int pebs_cnts_subleaf:1;
|
||||
unsigned int reserved:26;
|
||||
} split;
|
||||
unsigned int full;
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue