mirror of https://github.com/torvalds/linux.git
Performance events changes for v6.19:
Callchain support:
- Add support for deferred user-space stack unwinding for
perf, enabled on x86. (Peter Zijlstra, Steven Rostedt)
- unwind_user/x86: Enable frame pointer unwinding on x86
(Josh Poimboeuf)
x86 PMU support and infrastructure:
- x86/insn: Simplify for_each_insn_prefix() (Peter Zijlstra)
- x86/insn,uprobes,alternative: Unify insn_is_nop()
(Peter Zijlstra)
Intel PMU driver:
- Large series to prepare for and implement architectural PEBS
support for Intel platforms such as Clearwater Forest (CWF)
and Panther Lake (PTL). (Dapeng Mi, Kan Liang)
- Check dynamic constraints (Kan Liang)
- Optimize PEBS extended config (Peter Zijlstra)
- cstates: Remove PC3 support from LunarLake (Zhang Rui)
- cstates: Add Pantherlake support (Zhang Rui)
- cstates: Clearwater Forest support (Zide Chen)
AMD PMU driver:
- x86/amd: Check event before enable to avoid GPF (George Kennedy)
Fixes and cleanups:
- task_work: Fix NMI race condition (Peter Zijlstra)
- perf/x86: Fix NULL event access and potential PEBS record loss
(Dapeng Mi)
- Misc other fixes and cleanups.
(Dapeng Mi, Ingo Molnar, Peter Zijlstra)
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-----BEGIN PGP SIGNATURE-----
iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmktcU0RHG1pbmdvQGtl
cm5lbC5vcmcACgkQEnMQ0APhK1gNKw//ThLmbkoGJ0/yLOdEcW8rA/7HB43Oz6j9
k0Vs7zwDBMRFP4zQg2XeF5SH7CWS9p/nI3eMhorgmH77oJCvXJxVtD5991zmlZhf
eafOar5ZMVaoMz+tK8WWiENZyuN0bt0mumZmz9svXR3KV1S/q18XZ8bCas0itwnq
D0T3Gqi/Z39gJIy7bHNgLoFY2zvI9b2EJNDKlzHk3NJ7UamA4GuMHN0cM2dIzKGK
2L+wXOe2BH9YYzYrz/cdKq7sBMjOvFsCQ/5jh23A2Yu6JI4nJbw0WmexZRK1OWCp
GAdMjBuqIShibLRxK746WRO9iut49uTsah4iSG80hXzhpwf7VaegOarost1nLaqm
zweIOr3iwJRf273r6IqRuaporVHpQYMj2w2H63z36sQtGtkKHNyxZ50b6bqpwwjU
LikLEJ9Bmh3mlvlXsOx2wX6dTb1fUk+cy2ezCDKUHqOLjqy4dM8V+jYhuRO4yxXz
mj9aHZKgyuREt8yo/3nLqAzF5Okj9cXp7H6F1hCKWuCoAhNXkrvYcvbg8h6aRxOX
2vGhMYjpElkl/DG6OWCSwuqCt9nVEC/dazW9fKQjh4S0CFOVopaMGSkGcS/xUPub
92J4XMDEJX4RJ6dfspeQr97+1fETXEIWNv4WbKnDjqJlAucU1gnOTprVnAYUjcWw
74320FjGN1E=
=/8GE
-----END PGP SIGNATURE-----
Merge tag 'perf-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull performance events updates from Ingo Molnar:
"Callchain support:
- Add support for deferred user-space stack unwinding for perf,
enabled on x86. (Peter Zijlstra, Steven Rostedt)
- unwind_user/x86: Enable frame pointer unwinding on x86 (Josh
Poimboeuf)
x86 PMU support and infrastructure:
- x86/insn: Simplify for_each_insn_prefix() (Peter Zijlstra)
- x86/insn,uprobes,alternative: Unify insn_is_nop() (Peter Zijlstra)
Intel PMU driver:
- Large series to prepare for and implement architectural PEBS
support for Intel platforms such as Clearwater Forest (CWF) and
Panther Lake (PTL). (Dapeng Mi, Kan Liang)
- Check dynamic constraints (Kan Liang)
- Optimize PEBS extended config (Peter Zijlstra)
- cstates:
- Remove PC3 support from LunarLake (Zhang Rui)
- Add Pantherlake support (Zhang Rui)
- Clearwater Forest support (Zide Chen)
AMD PMU driver:
- x86/amd: Check event before enable to avoid GPF (George Kennedy)
Fixes and cleanups:
- task_work: Fix NMI race condition (Peter Zijlstra)
- perf/x86: Fix NULL event access and potential PEBS record loss
(Dapeng Mi)
- Misc other fixes and cleanups (Dapeng Mi, Ingo Molnar, Peter
Zijlstra)"
* tag 'perf-core-2025-12-01' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (38 commits)
perf/x86/intel: Fix and clean up intel_pmu_drain_arch_pebs() type use
perf/x86/intel: Optimize PEBS extended config
perf/x86/intel: Check PEBS dyn_constraints
perf/x86/intel: Add a check for dynamic constraints
perf/x86/intel: Add counter group support for arch-PEBS
perf/x86/intel: Setup PEBS data configuration and enable legacy groups
perf/x86/intel: Update dyn_constraint base on PEBS event precise level
perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR
perf/x86/intel: Process arch-PEBS records or record fragments
perf/x86/intel/ds: Factor out PEBS group processing code to functions
perf/x86/intel/ds: Factor out PEBS record processing code to functions
perf/x86/intel: Initialize architectural PEBS
perf/x86/intel: Correct large PEBS flag check
perf/x86/intel: Replace x86_pmu.drain_pebs calling with static call
perf/x86: Fix NULL event access and potential PEBS record loss
perf/x86: Remove redundant is_x86_event() prototype
entry,unwind/deferred: Fix unwind_reset_info() placement
unwind_user/x86: Fix arch=um build
perf: Support deferred user unwind
unwind_user/x86: Teach FP unwind about start of function
...
This commit is contained in:
commit
6c26fbe8c9
|
|
@ -298,6 +298,7 @@ config X86
|
||||||
select HAVE_SYSCALL_TRACEPOINTS
|
select HAVE_SYSCALL_TRACEPOINTS
|
||||||
select HAVE_UACCESS_VALIDATION if HAVE_OBJTOOL
|
select HAVE_UACCESS_VALIDATION if HAVE_OBJTOOL
|
||||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||||
|
select HAVE_UNWIND_USER_FP if X86_64
|
||||||
select HAVE_USER_RETURN_NOTIFIER
|
select HAVE_USER_RETURN_NOTIFIER
|
||||||
select HAVE_GENERIC_VDSO
|
select HAVE_GENERIC_VDSO
|
||||||
select VDSO_GETRANDOM if X86_64
|
select VDSO_GETRANDOM if X86_64
|
||||||
|
|
|
||||||
|
|
@ -29,11 +29,10 @@
|
||||||
bool insn_has_rep_prefix(struct insn *insn)
|
bool insn_has_rep_prefix(struct insn *insn)
|
||||||
{
|
{
|
||||||
insn_byte_t p;
|
insn_byte_t p;
|
||||||
int i;
|
|
||||||
|
|
||||||
insn_get_prefixes(insn);
|
insn_get_prefixes(insn);
|
||||||
|
|
||||||
for_each_insn_prefix(insn, i, p) {
|
for_each_insn_prefix(insn, p) {
|
||||||
if (p == 0xf2 || p == 0xf3)
|
if (p == 0xf2 || p == 0xf3)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -763,6 +763,11 @@ static void amd_pmu_enable_all(int added)
|
||||||
if (!test_bit(idx, cpuc->active_mask))
|
if (!test_bit(idx, cpuc->active_mask))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FIXME: cpuc->events[idx] can become NULL in a subtle race
|
||||||
|
* condition with NMI->throttle->x86_pmu_stop().
|
||||||
|
*/
|
||||||
|
if (cpuc->events[idx])
|
||||||
amd_pmu_enable_event(cpuc->events[idx]);
|
amd_pmu_enable_event(cpuc->events[idx]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -554,14 +554,22 @@ static inline int precise_br_compat(struct perf_event *event)
|
||||||
return m == b;
|
return m == b;
|
||||||
}
|
}
|
||||||
|
|
||||||
int x86_pmu_max_precise(void)
|
int x86_pmu_max_precise(struct pmu *pmu)
|
||||||
{
|
{
|
||||||
int precise = 0;
|
int precise = 0;
|
||||||
|
|
||||||
/* Support for constant skid */
|
|
||||||
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
|
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
|
||||||
|
/* arch PEBS */
|
||||||
|
if (x86_pmu.arch_pebs) {
|
||||||
|
precise = 2;
|
||||||
|
if (hybrid(pmu, arch_pebs_cap).pdists)
|
||||||
precise++;
|
precise++;
|
||||||
|
|
||||||
|
return precise;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* legacy PEBS - support for constant skid */
|
||||||
|
precise++;
|
||||||
/* Support for IP fixup */
|
/* Support for IP fixup */
|
||||||
if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
|
if (x86_pmu.lbr_nr || x86_pmu.intel_cap.pebs_format >= 2)
|
||||||
precise++;
|
precise++;
|
||||||
|
|
@ -569,13 +577,14 @@ int x86_pmu_max_precise(void)
|
||||||
if (x86_pmu.pebs_prec_dist)
|
if (x86_pmu.pebs_prec_dist)
|
||||||
precise++;
|
precise++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return precise;
|
return precise;
|
||||||
}
|
}
|
||||||
|
|
||||||
int x86_pmu_hw_config(struct perf_event *event)
|
int x86_pmu_hw_config(struct perf_event *event)
|
||||||
{
|
{
|
||||||
if (event->attr.precise_ip) {
|
if (event->attr.precise_ip) {
|
||||||
int precise = x86_pmu_max_precise();
|
int precise = x86_pmu_max_precise(event->pmu);
|
||||||
|
|
||||||
if (event->attr.precise_ip > precise)
|
if (event->attr.precise_ip > precise)
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
@ -1344,6 +1353,7 @@ static void x86_pmu_enable(struct pmu *pmu)
|
||||||
hwc->state |= PERF_HES_ARCH;
|
hwc->state |= PERF_HES_ARCH;
|
||||||
|
|
||||||
x86_pmu_stop(event, PERF_EF_UPDATE);
|
x86_pmu_stop(event, PERF_EF_UPDATE);
|
||||||
|
cpuc->events[hwc->idx] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -1365,6 +1375,7 @@ static void x86_pmu_enable(struct pmu *pmu)
|
||||||
* if cpuc->enabled = 0, then no wrmsr as
|
* if cpuc->enabled = 0, then no wrmsr as
|
||||||
* per x86_pmu_enable_event()
|
* per x86_pmu_enable_event()
|
||||||
*/
|
*/
|
||||||
|
cpuc->events[hwc->idx] = event;
|
||||||
x86_pmu_start(event, PERF_EF_RELOAD);
|
x86_pmu_start(event, PERF_EF_RELOAD);
|
||||||
}
|
}
|
||||||
cpuc->n_added = 0;
|
cpuc->n_added = 0;
|
||||||
|
|
@ -1531,7 +1542,6 @@ static void x86_pmu_start(struct perf_event *event, int flags)
|
||||||
|
|
||||||
event->hw.state = 0;
|
event->hw.state = 0;
|
||||||
|
|
||||||
cpuc->events[idx] = event;
|
|
||||||
__set_bit(idx, cpuc->active_mask);
|
__set_bit(idx, cpuc->active_mask);
|
||||||
static_call(x86_pmu_enable)(event);
|
static_call(x86_pmu_enable)(event);
|
||||||
perf_event_update_userpage(event);
|
perf_event_update_userpage(event);
|
||||||
|
|
@ -1610,7 +1620,6 @@ void x86_pmu_stop(struct perf_event *event, int flags)
|
||||||
if (test_bit(hwc->idx, cpuc->active_mask)) {
|
if (test_bit(hwc->idx, cpuc->active_mask)) {
|
||||||
static_call(x86_pmu_disable)(event);
|
static_call(x86_pmu_disable)(event);
|
||||||
__clear_bit(hwc->idx, cpuc->active_mask);
|
__clear_bit(hwc->idx, cpuc->active_mask);
|
||||||
cpuc->events[hwc->idx] = NULL;
|
|
||||||
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
|
||||||
hwc->state |= PERF_HES_STOPPED;
|
hwc->state |= PERF_HES_STOPPED;
|
||||||
}
|
}
|
||||||
|
|
@ -1648,6 +1657,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
|
||||||
* Not a TXN, therefore cleanup properly.
|
* Not a TXN, therefore cleanup properly.
|
||||||
*/
|
*/
|
||||||
x86_pmu_stop(event, PERF_EF_UPDATE);
|
x86_pmu_stop(event, PERF_EF_UPDATE);
|
||||||
|
cpuc->events[event->hw.idx] = NULL;
|
||||||
|
|
||||||
for (i = 0; i < cpuc->n_events; i++) {
|
for (i = 0; i < cpuc->n_events; i++) {
|
||||||
if (event == cpuc->event_list[i])
|
if (event == cpuc->event_list[i])
|
||||||
|
|
@ -2629,7 +2639,9 @@ static ssize_t max_precise_show(struct device *cdev,
|
||||||
struct device_attribute *attr,
|
struct device_attribute *attr,
|
||||||
char *buf)
|
char *buf)
|
||||||
{
|
{
|
||||||
return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise());
|
struct pmu *pmu = dev_get_drvdata(cdev);
|
||||||
|
|
||||||
|
return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu_max_precise(pmu));
|
||||||
}
|
}
|
||||||
|
|
||||||
static DEVICE_ATTR_RO(max_precise);
|
static DEVICE_ATTR_RO(max_precise);
|
||||||
|
|
@ -2845,46 +2857,6 @@ static unsigned long get_segment_base(unsigned int segment)
|
||||||
return get_desc_base(desc);
|
return get_desc_base(desc);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_UPROBES
|
|
||||||
/*
|
|
||||||
* Heuristic-based check if uprobe is installed at the function entry.
|
|
||||||
*
|
|
||||||
* Under assumption of user code being compiled with frame pointers,
|
|
||||||
* `push %rbp/%ebp` is a good indicator that we indeed are.
|
|
||||||
*
|
|
||||||
* Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
|
|
||||||
* If we get this wrong, captured stack trace might have one extra bogus
|
|
||||||
* entry, but the rest of stack trace will still be meaningful.
|
|
||||||
*/
|
|
||||||
static bool is_uprobe_at_func_entry(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
struct arch_uprobe *auprobe;
|
|
||||||
|
|
||||||
if (!current->utask)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
auprobe = current->utask->auprobe;
|
|
||||||
if (!auprobe)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* push %rbp/%ebp */
|
|
||||||
if (auprobe->insn[0] == 0x55)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* endbr64 (64-bit only) */
|
|
||||||
if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
static bool is_uprobe_at_func_entry(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
#endif /* CONFIG_UPROBES */
|
|
||||||
|
|
||||||
#ifdef CONFIG_IA32_EMULATION
|
#ifdef CONFIG_IA32_EMULATION
|
||||||
|
|
||||||
#include <linux/compat.h>
|
#include <linux/compat.h>
|
||||||
|
|
|
||||||
|
|
@ -2563,6 +2563,44 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
|
||||||
cpuc->fixed_ctrl_val &= ~mask;
|
cpuc->fixed_ctrl_val &= ~mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void __intel_pmu_update_event_ext(int idx, u64 ext)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
u32 msr;
|
||||||
|
|
||||||
|
if (idx < INTEL_PMC_IDX_FIXED) {
|
||||||
|
msr = MSR_IA32_PMC_V6_GP0_CFG_C +
|
||||||
|
x86_pmu.addr_offset(idx, false);
|
||||||
|
} else {
|
||||||
|
msr = MSR_IA32_PMC_V6_FX0_CFG_C +
|
||||||
|
x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
cpuc->cfg_c_val[idx] = ext;
|
||||||
|
wrmsrq(msr, ext);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void intel_pmu_disable_event_ext(struct perf_event *event)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Only clear CFG_C MSR for PEBS counter group events,
|
||||||
|
* it avoids the HW counter's value to be added into
|
||||||
|
* other PEBS records incorrectly after PEBS counter
|
||||||
|
* group events are disabled.
|
||||||
|
*
|
||||||
|
* For other events, it's unnecessary to clear CFG_C MSRs
|
||||||
|
* since CFG_C doesn't take effect if counter is in
|
||||||
|
* disabled state. That helps to reduce the WRMSR overhead
|
||||||
|
* in context switches.
|
||||||
|
*/
|
||||||
|
if (!is_pebs_counter_event_group(event))
|
||||||
|
return;
|
||||||
|
|
||||||
|
__intel_pmu_update_event_ext(event->hw.idx, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_STATIC_CALL_NULL(intel_pmu_disable_event_ext, intel_pmu_disable_event_ext);
|
||||||
|
|
||||||
static void intel_pmu_disable_event(struct perf_event *event)
|
static void intel_pmu_disable_event(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
|
@ -2571,9 +2609,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
|
||||||
switch (idx) {
|
switch (idx) {
|
||||||
case 0 ... INTEL_PMC_IDX_FIXED - 1:
|
case 0 ... INTEL_PMC_IDX_FIXED - 1:
|
||||||
intel_clear_masks(event, idx);
|
intel_clear_masks(event, idx);
|
||||||
|
static_call_cond(intel_pmu_disable_event_ext)(event);
|
||||||
x86_pmu_disable_event(event);
|
x86_pmu_disable_event(event);
|
||||||
break;
|
break;
|
||||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
||||||
|
static_call_cond(intel_pmu_disable_event_ext)(event);
|
||||||
|
fallthrough;
|
||||||
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
||||||
intel_pmu_disable_fixed(event);
|
intel_pmu_disable_fixed(event);
|
||||||
break;
|
break;
|
||||||
|
|
@ -2940,6 +2981,79 @@ static void intel_pmu_enable_acr(struct perf_event *event)
|
||||||
|
|
||||||
DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
|
DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
|
||||||
|
|
||||||
|
static void intel_pmu_enable_event_ext(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
union arch_pebs_index old, new;
|
||||||
|
struct arch_pebs_cap cap;
|
||||||
|
u64 ext = 0;
|
||||||
|
|
||||||
|
cap = hybrid(cpuc->pmu, arch_pebs_cap);
|
||||||
|
|
||||||
|
if (event->attr.precise_ip) {
|
||||||
|
u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event);
|
||||||
|
|
||||||
|
ext |= ARCH_PEBS_EN;
|
||||||
|
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD)
|
||||||
|
ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD;
|
||||||
|
|
||||||
|
if (pebs_data_cfg && cap.caps) {
|
||||||
|
if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
|
||||||
|
ext |= ARCH_PEBS_AUX & cap.caps;
|
||||||
|
|
||||||
|
if (pebs_data_cfg & PEBS_DATACFG_GP)
|
||||||
|
ext |= ARCH_PEBS_GPR & cap.caps;
|
||||||
|
|
||||||
|
if (pebs_data_cfg & PEBS_DATACFG_XMMS)
|
||||||
|
ext |= ARCH_PEBS_VECR_XMM & cap.caps;
|
||||||
|
|
||||||
|
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
|
||||||
|
ext |= ARCH_PEBS_LBR & cap.caps;
|
||||||
|
|
||||||
|
if (pebs_data_cfg &
|
||||||
|
(PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT))
|
||||||
|
ext |= ARCH_PEBS_CNTR_GP & cap.caps;
|
||||||
|
|
||||||
|
if (pebs_data_cfg &
|
||||||
|
(PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT))
|
||||||
|
ext |= ARCH_PEBS_CNTR_FIXED & cap.caps;
|
||||||
|
|
||||||
|
if (pebs_data_cfg & PEBS_DATACFG_METRICS)
|
||||||
|
ext |= ARCH_PEBS_CNTR_METRICS & cap.caps;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cpuc->n_pebs == cpuc->n_large_pebs)
|
||||||
|
new.thresh = ARCH_PEBS_THRESH_MULTI;
|
||||||
|
else
|
||||||
|
new.thresh = ARCH_PEBS_THRESH_SINGLE;
|
||||||
|
|
||||||
|
rdmsrq(MSR_IA32_PEBS_INDEX, old.whole);
|
||||||
|
if (new.thresh != old.thresh || !old.en) {
|
||||||
|
if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) {
|
||||||
|
/*
|
||||||
|
* Large PEBS was enabled.
|
||||||
|
* Drain PEBS buffer before applying the single PEBS.
|
||||||
|
*/
|
||||||
|
intel_pmu_drain_pebs_buffer();
|
||||||
|
} else {
|
||||||
|
new.wr = 0;
|
||||||
|
new.full = 0;
|
||||||
|
new.en = 1;
|
||||||
|
wrmsrq(MSR_IA32_PEBS_INDEX, new.whole);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_pebs_counter_event_group(event))
|
||||||
|
ext |= ARCH_PEBS_CNTR_ALLOW;
|
||||||
|
|
||||||
|
if (cpuc->cfg_c_val[hwc->idx] != ext)
|
||||||
|
__intel_pmu_update_event_ext(hwc->idx, ext);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_STATIC_CALL_NULL(intel_pmu_enable_event_ext, intel_pmu_enable_event_ext);
|
||||||
|
|
||||||
static void intel_pmu_enable_event(struct perf_event *event)
|
static void intel_pmu_enable_event(struct perf_event *event)
|
||||||
{
|
{
|
||||||
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
|
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||||
|
|
@ -2955,10 +3069,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
|
||||||
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
|
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
|
||||||
intel_set_masks(event, idx);
|
intel_set_masks(event, idx);
|
||||||
static_call_cond(intel_pmu_enable_acr_event)(event);
|
static_call_cond(intel_pmu_enable_acr_event)(event);
|
||||||
|
static_call_cond(intel_pmu_enable_event_ext)(event);
|
||||||
__x86_pmu_enable_event(hwc, enable_mask);
|
__x86_pmu_enable_event(hwc, enable_mask);
|
||||||
break;
|
break;
|
||||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
||||||
static_call_cond(intel_pmu_enable_acr_event)(event);
|
static_call_cond(intel_pmu_enable_acr_event)(event);
|
||||||
|
static_call_cond(intel_pmu_enable_event_ext)(event);
|
||||||
fallthrough;
|
fallthrough;
|
||||||
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
||||||
intel_pmu_enable_fixed(event);
|
intel_pmu_enable_fixed(event);
|
||||||
|
|
@ -3215,6 +3331,19 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||||
status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
|
status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Arch PEBS sets bit 54 in the global status register
|
||||||
|
*/
|
||||||
|
if (__test_and_clear_bit(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT,
|
||||||
|
(unsigned long *)&status)) {
|
||||||
|
handled++;
|
||||||
|
static_call(x86_pmu_drain_pebs)(regs, &data);
|
||||||
|
|
||||||
|
if (cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS] &&
|
||||||
|
is_pebs_counter_event_group(cpuc->events[INTEL_PMC_IDX_FIXED_SLOTS]))
|
||||||
|
status &= ~GLOBAL_STATUS_PERF_METRICS_OVF_BIT;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Intel PT
|
* Intel PT
|
||||||
*/
|
*/
|
||||||
|
|
@ -3269,7 +3398,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||||
* The PEBS buffer has to be drained before handling the A-PMI
|
* The PEBS buffer has to be drained before handling the A-PMI
|
||||||
*/
|
*/
|
||||||
if (is_pebs_counter_event_group(event))
|
if (is_pebs_counter_event_group(event))
|
||||||
x86_pmu.drain_pebs(regs, &data);
|
static_call(x86_pmu_drain_pebs)(regs, &data);
|
||||||
|
|
||||||
last_period = event->hw.last_period;
|
last_period = event->hw.last_period;
|
||||||
|
|
||||||
|
|
@ -4029,7 +4158,9 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
|
||||||
if (!event->attr.exclude_kernel)
|
if (!event->attr.exclude_kernel)
|
||||||
flags &= ~PERF_SAMPLE_REGS_USER;
|
flags &= ~PERF_SAMPLE_REGS_USER;
|
||||||
if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
|
if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
|
||||||
flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
|
flags &= ~PERF_SAMPLE_REGS_USER;
|
||||||
|
if (event->attr.sample_regs_intr & ~PEBS_GP_REGS)
|
||||||
|
flags &= ~PERF_SAMPLE_REGS_INTR;
|
||||||
return flags;
|
return flags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -4204,6 +4335,20 @@ static bool intel_pmu_is_acr_group(struct perf_event *event)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu)
|
||||||
|
{
|
||||||
|
u64 caps;
|
||||||
|
|
||||||
|
if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
caps = hybrid(pmu, arch_pebs_cap).caps;
|
||||||
|
if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
|
static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
|
||||||
u64 *cause_mask, int *num)
|
u64 *cause_mask, int *num)
|
||||||
{
|
{
|
||||||
|
|
@ -4237,6 +4382,8 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (event->attr.precise_ip) {
|
if (event->attr.precise_ip) {
|
||||||
|
struct arch_pebs_cap pebs_cap = hybrid(event->pmu, arch_pebs_cap);
|
||||||
|
|
||||||
if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
|
if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
|
@ -4250,6 +4397,15 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||||
}
|
}
|
||||||
if (x86_pmu.pebs_aliases)
|
if (x86_pmu.pebs_aliases)
|
||||||
x86_pmu.pebs_aliases(event);
|
x86_pmu.pebs_aliases(event);
|
||||||
|
|
||||||
|
if (x86_pmu.arch_pebs) {
|
||||||
|
u64 cntr_mask = hybrid(event->pmu, intel_ctrl) &
|
||||||
|
~GLOBAL_CTRL_EN_PERF_METRICS;
|
||||||
|
u64 pebs_mask = event->attr.precise_ip >= 3 ?
|
||||||
|
pebs_cap.pdists : pebs_cap.counters;
|
||||||
|
if (cntr_mask != pebs_mask)
|
||||||
|
event->hw.dyn_constraint &= pebs_mask;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (needs_branch_stack(event)) {
|
if (needs_branch_stack(event)) {
|
||||||
|
|
@ -4341,8 +4497,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
|
if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
|
||||||
(x86_pmu.intel_cap.pebs_format >= 6) &&
|
intel_pmu_has_pebs_counter_group(event->pmu) &&
|
||||||
x86_pmu.intel_cap.pebs_baseline &&
|
|
||||||
is_sampling_event(event) &&
|
is_sampling_event(event) &&
|
||||||
event->attr.precise_ip)
|
event->attr.precise_ip)
|
||||||
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
|
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
|
||||||
|
|
@ -5212,7 +5367,13 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
|
||||||
|
|
||||||
static int intel_pmu_cpu_prepare(int cpu)
|
static int intel_pmu_cpu_prepare(int cpu)
|
||||||
{
|
{
|
||||||
return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
|
int ret;
|
||||||
|
|
||||||
|
ret = intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
return alloc_arch_pebs_buf_on_cpu(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void flip_smm_bit(void *data)
|
static void flip_smm_bit(void *data)
|
||||||
|
|
@ -5257,6 +5418,163 @@ static void intel_pmu_check_event_constraints(struct event_constraint *event_con
|
||||||
u64 fixed_cntr_mask,
|
u64 fixed_cntr_mask,
|
||||||
u64 intel_ctrl);
|
u64 intel_ctrl);
|
||||||
|
|
||||||
|
enum dyn_constr_type {
|
||||||
|
DYN_CONSTR_NONE,
|
||||||
|
DYN_CONSTR_BR_CNTR,
|
||||||
|
DYN_CONSTR_ACR_CNTR,
|
||||||
|
DYN_CONSTR_ACR_CAUSE,
|
||||||
|
DYN_CONSTR_PEBS,
|
||||||
|
DYN_CONSTR_PDIST,
|
||||||
|
|
||||||
|
DYN_CONSTR_MAX,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const char * const dyn_constr_type_name[] = {
|
||||||
|
[DYN_CONSTR_NONE] = "a normal event",
|
||||||
|
[DYN_CONSTR_BR_CNTR] = "a branch counter logging event",
|
||||||
|
[DYN_CONSTR_ACR_CNTR] = "an auto-counter reload event",
|
||||||
|
[DYN_CONSTR_ACR_CAUSE] = "an auto-counter reload cause event",
|
||||||
|
[DYN_CONSTR_PEBS] = "a PEBS event",
|
||||||
|
[DYN_CONSTR_PDIST] = "a PEBS PDIST event",
|
||||||
|
};
|
||||||
|
|
||||||
|
static void __intel_pmu_check_dyn_constr(struct event_constraint *constr,
|
||||||
|
enum dyn_constr_type type, u64 mask)
|
||||||
|
{
|
||||||
|
struct event_constraint *c1, *c2;
|
||||||
|
int new_weight, check_weight;
|
||||||
|
u64 new_mask, check_mask;
|
||||||
|
|
||||||
|
for_each_event_constraint(c1, constr) {
|
||||||
|
new_mask = c1->idxmsk64 & mask;
|
||||||
|
new_weight = hweight64(new_mask);
|
||||||
|
|
||||||
|
/* ignore topdown perf metrics event */
|
||||||
|
if (c1->idxmsk64 & INTEL_PMC_MSK_TOPDOWN)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!new_weight && fls64(c1->idxmsk64) < INTEL_PMC_IDX_FIXED) {
|
||||||
|
pr_info("The event 0x%llx is not supported as %s.\n",
|
||||||
|
c1->code, dyn_constr_type_name[type]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (new_weight <= 1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for_each_event_constraint(c2, c1 + 1) {
|
||||||
|
bool check_fail = false;
|
||||||
|
|
||||||
|
check_mask = c2->idxmsk64 & mask;
|
||||||
|
check_weight = hweight64(check_mask);
|
||||||
|
|
||||||
|
if (c2->idxmsk64 & INTEL_PMC_MSK_TOPDOWN ||
|
||||||
|
!check_weight)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* The same constraints or no overlap */
|
||||||
|
if (new_mask == check_mask ||
|
||||||
|
(new_mask ^ check_mask) == (new_mask | check_mask))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A scheduler issue may be triggered in the following cases.
|
||||||
|
* - Two overlap constraints have the same weight.
|
||||||
|
* E.g., A constraints: 0x3, B constraints: 0x6
|
||||||
|
* event counter failure case
|
||||||
|
* B PMC[2:1] 1
|
||||||
|
* A PMC[1:0] 0
|
||||||
|
* A PMC[1:0] FAIL
|
||||||
|
* - Two overlap constraints have different weight.
|
||||||
|
* The constraint has a low weight, but has high last bit.
|
||||||
|
* E.g., A constraints: 0x7, B constraints: 0xC
|
||||||
|
* event counter failure case
|
||||||
|
* B PMC[3:2] 2
|
||||||
|
* A PMC[2:0] 0
|
||||||
|
* A PMC[2:0] 1
|
||||||
|
* A PMC[2:0] FAIL
|
||||||
|
*/
|
||||||
|
if (new_weight == check_weight) {
|
||||||
|
check_fail = true;
|
||||||
|
} else if (new_weight < check_weight) {
|
||||||
|
if ((new_mask | check_mask) != check_mask &&
|
||||||
|
fls64(new_mask) > fls64(check_mask))
|
||||||
|
check_fail = true;
|
||||||
|
} else {
|
||||||
|
if ((new_mask | check_mask) != new_mask &&
|
||||||
|
fls64(new_mask) < fls64(check_mask))
|
||||||
|
check_fail = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (check_fail) {
|
||||||
|
pr_info("The two events 0x%llx and 0x%llx may not be "
|
||||||
|
"fully scheduled under some circumstances as "
|
||||||
|
"%s.\n",
|
||||||
|
c1->code, c2->code, dyn_constr_type_name[type]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void intel_pmu_check_dyn_constr(struct pmu *pmu,
|
||||||
|
struct event_constraint *constr,
|
||||||
|
u64 cntr_mask)
|
||||||
|
{
|
||||||
|
enum dyn_constr_type i;
|
||||||
|
u64 mask;
|
||||||
|
|
||||||
|
for (i = DYN_CONSTR_NONE; i < DYN_CONSTR_MAX; i++) {
|
||||||
|
mask = 0;
|
||||||
|
switch (i) {
|
||||||
|
case DYN_CONSTR_NONE:
|
||||||
|
mask = cntr_mask;
|
||||||
|
break;
|
||||||
|
case DYN_CONSTR_BR_CNTR:
|
||||||
|
if (x86_pmu.flags & PMU_FL_BR_CNTR)
|
||||||
|
mask = x86_pmu.lbr_counters;
|
||||||
|
break;
|
||||||
|
case DYN_CONSTR_ACR_CNTR:
|
||||||
|
mask = hybrid(pmu, acr_cntr_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
|
||||||
|
break;
|
||||||
|
case DYN_CONSTR_ACR_CAUSE:
|
||||||
|
if (hybrid(pmu, acr_cntr_mask64) == hybrid(pmu, acr_cause_mask64))
|
||||||
|
continue;
|
||||||
|
mask = hybrid(pmu, acr_cause_mask64) & GENMASK_ULL(INTEL_PMC_MAX_GENERIC - 1, 0);
|
||||||
|
break;
|
||||||
|
case DYN_CONSTR_PEBS:
|
||||||
|
if (x86_pmu.arch_pebs)
|
||||||
|
mask = hybrid(pmu, arch_pebs_cap).counters;
|
||||||
|
break;
|
||||||
|
case DYN_CONSTR_PDIST:
|
||||||
|
if (x86_pmu.arch_pebs)
|
||||||
|
mask = hybrid(pmu, arch_pebs_cap).pdists;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
pr_warn("Unsupported dynamic constraint type %d\n", i);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mask)
|
||||||
|
__intel_pmu_check_dyn_constr(constr, i, mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void intel_pmu_check_event_constraints_all(struct pmu *pmu)
|
||||||
|
{
|
||||||
|
struct event_constraint *event_constraints = hybrid(pmu, event_constraints);
|
||||||
|
struct event_constraint *pebs_constraints = hybrid(pmu, pebs_constraints);
|
||||||
|
u64 cntr_mask = hybrid(pmu, cntr_mask64);
|
||||||
|
u64 fixed_cntr_mask = hybrid(pmu, fixed_cntr_mask64);
|
||||||
|
u64 intel_ctrl = hybrid(pmu, intel_ctrl);
|
||||||
|
|
||||||
|
intel_pmu_check_event_constraints(event_constraints, cntr_mask,
|
||||||
|
fixed_cntr_mask, intel_ctrl);
|
||||||
|
|
||||||
|
if (event_constraints)
|
||||||
|
intel_pmu_check_dyn_constr(pmu, event_constraints, cntr_mask);
|
||||||
|
|
||||||
|
if (pebs_constraints)
|
||||||
|
intel_pmu_check_dyn_constr(pmu, pebs_constraints, cntr_mask);
|
||||||
|
}
|
||||||
|
|
||||||
static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
|
static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
|
||||||
|
|
||||||
static inline bool intel_pmu_broken_perf_cap(void)
|
static inline bool intel_pmu_broken_perf_cap(void)
|
||||||
|
|
@ -5269,34 +5587,89 @@ static inline bool intel_pmu_broken_perf_cap(void)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void __intel_update_pmu_caps(struct pmu *pmu)
|
||||||
|
{
|
||||||
|
struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id());
|
||||||
|
|
||||||
|
if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM)
|
||||||
|
dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
|
||||||
|
{
|
||||||
|
u64 caps = hybrid(pmu, arch_pebs_cap).caps;
|
||||||
|
|
||||||
|
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
|
||||||
|
if (caps & ARCH_PEBS_LBR)
|
||||||
|
x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
|
||||||
|
if (caps & ARCH_PEBS_CNTR_MASK)
|
||||||
|
x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
|
||||||
|
|
||||||
|
if (!(caps & ARCH_PEBS_AUX))
|
||||||
|
x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
|
||||||
|
if (!(caps & ARCH_PEBS_GPR)) {
|
||||||
|
x86_pmu.large_pebs_flags &=
|
||||||
|
~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
|
||||||
|
|
||||||
static void update_pmu_cap(struct pmu *pmu)
|
static void update_pmu_cap(struct pmu *pmu)
|
||||||
{
|
{
|
||||||
unsigned int cntr, fixed_cntr, ecx, edx;
|
unsigned int eax, ebx, ecx, edx;
|
||||||
union cpuid35_eax eax;
|
union cpuid35_eax eax_0;
|
||||||
union cpuid35_ebx ebx;
|
union cpuid35_ebx ebx_0;
|
||||||
|
u64 cntrs_mask = 0;
|
||||||
|
u64 pebs_mask = 0;
|
||||||
|
u64 pdists_mask = 0;
|
||||||
|
|
||||||
cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
|
cpuid(ARCH_PERFMON_EXT_LEAF, &eax_0.full, &ebx_0.full, &ecx, &edx);
|
||||||
|
|
||||||
if (ebx.split.umask2)
|
if (ebx_0.split.umask2)
|
||||||
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
|
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
|
||||||
if (ebx.split.eq)
|
if (ebx_0.split.eq)
|
||||||
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;
|
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;
|
||||||
|
|
||||||
if (eax.split.cntr_subleaf) {
|
if (eax_0.split.cntr_subleaf) {
|
||||||
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
|
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
|
||||||
&cntr, &fixed_cntr, &ecx, &edx);
|
&eax, &ebx, &ecx, &edx);
|
||||||
hybrid(pmu, cntr_mask64) = cntr;
|
hybrid(pmu, cntr_mask64) = eax;
|
||||||
hybrid(pmu, fixed_cntr_mask64) = fixed_cntr;
|
hybrid(pmu, fixed_cntr_mask64) = ebx;
|
||||||
|
cntrs_mask = counter_mask(eax, ebx);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (eax.split.acr_subleaf) {
|
if (eax_0.split.acr_subleaf) {
|
||||||
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
|
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
|
||||||
&cntr, &fixed_cntr, &ecx, &edx);
|
&eax, &ebx, &ecx, &edx);
|
||||||
/* The mask of the counters which can be reloaded */
|
/* The mask of the counters which can be reloaded */
|
||||||
hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED);
|
hybrid(pmu, acr_cntr_mask64) = counter_mask(eax, ebx);
|
||||||
|
|
||||||
/* The mask of the counters which can cause a reload of reloadable counters */
|
/* The mask of the counters which can cause a reload of reloadable counters */
|
||||||
hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
|
hybrid(pmu, acr_cause_mask64) = counter_mask(ecx, edx);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Bits[5:4] should be set simultaneously if arch-PEBS is supported */
|
||||||
|
if (eax_0.split.pebs_caps_subleaf && eax_0.split.pebs_cnts_subleaf) {
|
||||||
|
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_CAP_LEAF,
|
||||||
|
&eax, &ebx, &ecx, &edx);
|
||||||
|
hybrid(pmu, arch_pebs_cap).caps = (u64)ebx << 32;
|
||||||
|
|
||||||
|
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_PEBS_COUNTER_LEAF,
|
||||||
|
&eax, &ebx, &ecx, &edx);
|
||||||
|
pebs_mask = counter_mask(eax, ecx);
|
||||||
|
pdists_mask = counter_mask(ebx, edx);
|
||||||
|
hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
|
||||||
|
hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
|
||||||
|
|
||||||
|
if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) {
|
||||||
|
x86_pmu.arch_pebs = 0;
|
||||||
|
} else {
|
||||||
|
__intel_update_pmu_caps(pmu);
|
||||||
|
__intel_update_large_pebs_flags(pmu);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
WARN_ON(x86_pmu.arch_pebs == 1);
|
||||||
|
x86_pmu.arch_pebs = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!intel_pmu_broken_perf_cap()) {
|
if (!intel_pmu_broken_perf_cap()) {
|
||||||
|
|
@ -5319,10 +5692,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
|
||||||
else
|
else
|
||||||
pmu->intel_ctrl &= ~GLOBAL_CTRL_EN_PERF_METRICS;
|
pmu->intel_ctrl &= ~GLOBAL_CTRL_EN_PERF_METRICS;
|
||||||
|
|
||||||
intel_pmu_check_event_constraints(pmu->event_constraints,
|
intel_pmu_check_event_constraints_all(&pmu->pmu);
|
||||||
pmu->cntr_mask64,
|
|
||||||
pmu->fixed_cntr_mask64,
|
|
||||||
pmu->intel_ctrl);
|
|
||||||
|
|
||||||
intel_pmu_check_extra_regs(pmu->extra_regs);
|
intel_pmu_check_extra_regs(pmu->extra_regs);
|
||||||
}
|
}
|
||||||
|
|
@ -5418,6 +5788,7 @@ static void intel_pmu_cpu_starting(int cpu)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
init_debug_store_on_cpu(cpu);
|
init_debug_store_on_cpu(cpu);
|
||||||
|
init_arch_pebs_on_cpu(cpu);
|
||||||
/*
|
/*
|
||||||
* Deal with CPUs that don't clear their LBRs on power-up, and that may
|
* Deal with CPUs that don't clear their LBRs on power-up, and that may
|
||||||
* even boot with LBRs enabled.
|
* even boot with LBRs enabled.
|
||||||
|
|
@ -5456,6 +5827,8 @@ static void intel_pmu_cpu_starting(int cpu)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__intel_update_pmu_caps(cpuc->pmu);
|
||||||
|
|
||||||
if (!cpuc->shared_regs)
|
if (!cpuc->shared_regs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
@ -5515,6 +5888,7 @@ static void free_excl_cntrs(struct cpu_hw_events *cpuc)
|
||||||
static void intel_pmu_cpu_dying(int cpu)
|
static void intel_pmu_cpu_dying(int cpu)
|
||||||
{
|
{
|
||||||
fini_debug_store_on_cpu(cpu);
|
fini_debug_store_on_cpu(cpu);
|
||||||
|
fini_arch_pebs_on_cpu(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
void intel_cpuc_finish(struct cpu_hw_events *cpuc)
|
void intel_cpuc_finish(struct cpu_hw_events *cpuc)
|
||||||
|
|
@ -5535,6 +5909,7 @@ static void intel_pmu_cpu_dead(int cpu)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
|
||||||
|
|
||||||
|
release_arch_pebs_buf_on_cpu(cpu);
|
||||||
intel_cpuc_finish(cpuc);
|
intel_cpuc_finish(cpuc);
|
||||||
|
|
||||||
if (is_hybrid() && cpuc->pmu)
|
if (is_hybrid() && cpuc->pmu)
|
||||||
|
|
@ -6250,7 +6625,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||||
static umode_t
|
static umode_t
|
||||||
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||||
{
|
{
|
||||||
return x86_pmu.ds_pebs ? attr->mode : 0;
|
return intel_pmu_has_pebs() ? attr->mode : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static umode_t
|
static umode_t
|
||||||
|
|
@ -6940,8 +7315,11 @@ __init int intel_pmu_init(void)
|
||||||
* Many features on and after V6 require dynamic constraint,
|
* Many features on and after V6 require dynamic constraint,
|
||||||
* e.g., Arch PEBS, ACR.
|
* e.g., Arch PEBS, ACR.
|
||||||
*/
|
*/
|
||||||
if (version >= 6)
|
if (version >= 6) {
|
||||||
x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
|
x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
|
||||||
|
x86_pmu.late_setup = intel_pmu_late_setup;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Install the hw-cache-events table:
|
* Install the hw-cache-events table:
|
||||||
*/
|
*/
|
||||||
|
|
@ -7727,6 +8105,14 @@ __init int intel_pmu_init(void)
|
||||||
if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
|
if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
|
||||||
update_pmu_cap(NULL);
|
update_pmu_cap(NULL);
|
||||||
|
|
||||||
|
if (x86_pmu.arch_pebs) {
|
||||||
|
static_call_update(intel_pmu_disable_event_ext,
|
||||||
|
intel_pmu_disable_event_ext);
|
||||||
|
static_call_update(intel_pmu_enable_event_ext,
|
||||||
|
intel_pmu_enable_event_ext);
|
||||||
|
pr_cont("Architectural PEBS, ");
|
||||||
|
}
|
||||||
|
|
||||||
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
|
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
|
||||||
&x86_pmu.fixed_cntr_mask64,
|
&x86_pmu.fixed_cntr_mask64,
|
||||||
&x86_pmu.intel_ctrl);
|
&x86_pmu.intel_ctrl);
|
||||||
|
|
@ -7735,10 +8121,8 @@ __init int intel_pmu_init(void)
|
||||||
if (x86_pmu.intel_cap.anythread_deprecated)
|
if (x86_pmu.intel_cap.anythread_deprecated)
|
||||||
x86_pmu.format_attrs = intel_arch_formats_attr;
|
x86_pmu.format_attrs = intel_arch_formats_attr;
|
||||||
|
|
||||||
intel_pmu_check_event_constraints(x86_pmu.event_constraints,
|
intel_pmu_check_event_constraints_all(NULL);
|
||||||
x86_pmu.cntr_mask64,
|
|
||||||
x86_pmu.fixed_cntr_mask64,
|
|
||||||
x86_pmu.intel_ctrl);
|
|
||||||
/*
|
/*
|
||||||
* Access LBR MSR may cause #GP under certain circumstances.
|
* Access LBR MSR may cause #GP under certain circumstances.
|
||||||
* Check all LBR MSR here.
|
* Check all LBR MSR here.
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@
|
||||||
* MSR_CORE_C1_RES: CORE C1 Residency Counter
|
* MSR_CORE_C1_RES: CORE C1 Residency Counter
|
||||||
* perf code: 0x00
|
* perf code: 0x00
|
||||||
* Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
|
* Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL
|
||||||
* MTL,SRF,GRR,ARL,LNL
|
* MTL,SRF,GRR,ARL,LNL,PTL
|
||||||
* Scope: Core (each processor core has a MSR)
|
* Scope: Core (each processor core has a MSR)
|
||||||
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
|
* MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter
|
||||||
* perf code: 0x01
|
* perf code: 0x01
|
||||||
|
|
@ -53,31 +53,32 @@
|
||||||
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
|
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
|
||||||
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
|
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
|
||||||
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
|
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
|
||||||
* GRR,ARL,LNL
|
* GRR,ARL,LNL,PTL
|
||||||
* Scope: Core
|
* Scope: Core
|
||||||
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
|
* MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter
|
||||||
* perf code: 0x03
|
* perf code: 0x03
|
||||||
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
|
* Available model: SNB,IVB,HSW,BDW,SKL,CNL,KBL,CML,
|
||||||
* ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL
|
* ICL,TGL,RKL,ADL,RPL,MTL,ARL,LNL,
|
||||||
|
* PTL
|
||||||
* Scope: Core
|
* Scope: Core
|
||||||
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
|
* MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter.
|
||||||
* perf code: 0x00
|
* perf code: 0x00
|
||||||
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
|
* Available model: SNB,IVB,HSW,BDW,SKL,KNL,GLM,CNL,
|
||||||
* KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
|
* KBL,CML,ICL,ICX,TGL,TNT,RKL,ADL,
|
||||||
* RPL,SPR,MTL,ARL,LNL,SRF
|
* RPL,SPR,MTL,ARL,LNL,SRF,PTL
|
||||||
* Scope: Package (physical package)
|
* Scope: Package (physical package)
|
||||||
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
|
* MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter.
|
||||||
* perf code: 0x01
|
* perf code: 0x01
|
||||||
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
|
* Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL,
|
||||||
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
|
* GLM,CNL,KBL,CML,ICL,TGL,TNT,RKL,
|
||||||
* ADL,RPL,MTL,ARL,LNL
|
* ADL,RPL,MTL,ARL
|
||||||
* Scope: Package (physical package)
|
* Scope: Package (physical package)
|
||||||
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
|
* MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter.
|
||||||
* perf code: 0x02
|
* perf code: 0x02
|
||||||
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
|
* Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,
|
||||||
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
|
* SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX,
|
||||||
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
|
* TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF,
|
||||||
* ARL,LNL
|
* ARL,LNL,PTL
|
||||||
* Scope: Package (physical package)
|
* Scope: Package (physical package)
|
||||||
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
|
* MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter.
|
||||||
* perf code: 0x03
|
* perf code: 0x03
|
||||||
|
|
@ -96,7 +97,7 @@
|
||||||
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
|
* MSR_PKG_C10_RESIDENCY: Package C10 Residency Counter.
|
||||||
* perf code: 0x06
|
* perf code: 0x06
|
||||||
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
|
* Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL,
|
||||||
* TNT,RKL,ADL,RPL,MTL,ARL,LNL
|
* TNT,RKL,ADL,RPL,MTL,ARL,LNL,PTL
|
||||||
* Scope: Package (physical package)
|
* Scope: Package (physical package)
|
||||||
* MSR_MODULE_C6_RES_MS: Module C6 Residency Counter.
|
* MSR_MODULE_C6_RES_MS: Module C6 Residency Counter.
|
||||||
* perf code: 0x00
|
* perf code: 0x00
|
||||||
|
|
@ -522,7 +523,6 @@ static const struct cstate_model lnl_cstates __initconst = {
|
||||||
BIT(PERF_CSTATE_CORE_C7_RES),
|
BIT(PERF_CSTATE_CORE_C7_RES),
|
||||||
|
|
||||||
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
|
.pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) |
|
||||||
BIT(PERF_CSTATE_PKG_C3_RES) |
|
|
||||||
BIT(PERF_CSTATE_PKG_C6_RES) |
|
BIT(PERF_CSTATE_PKG_C6_RES) |
|
||||||
BIT(PERF_CSTATE_PKG_C10_RES),
|
BIT(PERF_CSTATE_PKG_C10_RES),
|
||||||
};
|
};
|
||||||
|
|
@ -628,6 +628,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
|
||||||
X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_cstates),
|
X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &adl_cstates),
|
||||||
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &srf_cstates),
|
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, &srf_cstates),
|
||||||
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &grr_cstates),
|
X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, &grr_cstates),
|
||||||
|
X86_MATCH_VFM(INTEL_ATOM_DARKMONT_X, &srf_cstates),
|
||||||
|
|
||||||
X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_cstates),
|
X86_MATCH_VFM(INTEL_ICELAKE_L, &icl_cstates),
|
||||||
X86_MATCH_VFM(INTEL_ICELAKE, &icl_cstates),
|
X86_MATCH_VFM(INTEL_ICELAKE, &icl_cstates),
|
||||||
|
|
@ -652,6 +653,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
|
||||||
X86_MATCH_VFM(INTEL_ARROWLAKE_H, &adl_cstates),
|
X86_MATCH_VFM(INTEL_ARROWLAKE_H, &adl_cstates),
|
||||||
X86_MATCH_VFM(INTEL_ARROWLAKE_U, &adl_cstates),
|
X86_MATCH_VFM(INTEL_ARROWLAKE_U, &adl_cstates),
|
||||||
X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_cstates),
|
X86_MATCH_VFM(INTEL_LUNARLAKE_M, &lnl_cstates),
|
||||||
|
X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &lnl_cstates),
|
||||||
{ },
|
{ },
|
||||||
};
|
};
|
||||||
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
|
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
|
||||||
|
|
|
||||||
|
|
@ -626,13 +626,18 @@ static int alloc_pebs_buffer(int cpu)
|
||||||
int max, node = cpu_to_node(cpu);
|
int max, node = cpu_to_node(cpu);
|
||||||
void *buffer, *insn_buff, *cea;
|
void *buffer, *insn_buff, *cea;
|
||||||
|
|
||||||
if (!x86_pmu.ds_pebs)
|
if (!intel_pmu_has_pebs())
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
|
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
|
||||||
if (unlikely(!buffer))
|
if (unlikely(!buffer))
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
if (x86_pmu.arch_pebs) {
|
||||||
|
hwev->pebs_vaddr = buffer;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* HSW+ already provides us the eventing ip; no need to allocate this
|
* HSW+ already provides us the eventing ip; no need to allocate this
|
||||||
* buffer then.
|
* buffer then.
|
||||||
|
|
@ -645,7 +650,7 @@ static int alloc_pebs_buffer(int cpu)
|
||||||
}
|
}
|
||||||
per_cpu(insn_buffer, cpu) = insn_buff;
|
per_cpu(insn_buffer, cpu) = insn_buff;
|
||||||
}
|
}
|
||||||
hwev->ds_pebs_vaddr = buffer;
|
hwev->pebs_vaddr = buffer;
|
||||||
/* Update the cpu entry area mapping */
|
/* Update the cpu entry area mapping */
|
||||||
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
|
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
|
||||||
ds->pebs_buffer_base = (unsigned long) cea;
|
ds->pebs_buffer_base = (unsigned long) cea;
|
||||||
|
|
@ -661,17 +666,20 @@ static void release_pebs_buffer(int cpu)
|
||||||
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||||
void *cea;
|
void *cea;
|
||||||
|
|
||||||
if (!x86_pmu.ds_pebs)
|
if (!intel_pmu_has_pebs())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (x86_pmu.ds_pebs) {
|
||||||
kfree(per_cpu(insn_buffer, cpu));
|
kfree(per_cpu(insn_buffer, cpu));
|
||||||
per_cpu(insn_buffer, cpu) = NULL;
|
per_cpu(insn_buffer, cpu) = NULL;
|
||||||
|
|
||||||
/* Clear the fixmap */
|
/* Clear the fixmap */
|
||||||
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
|
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
|
||||||
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
|
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
|
||||||
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
|
}
|
||||||
hwev->ds_pebs_vaddr = NULL;
|
|
||||||
|
dsfree_pages(hwev->pebs_vaddr, x86_pmu.pebs_buffer_size);
|
||||||
|
hwev->pebs_vaddr = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int alloc_bts_buffer(int cpu)
|
static int alloc_bts_buffer(int cpu)
|
||||||
|
|
@ -824,6 +832,56 @@ void reserve_ds_buffers(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline int alloc_arch_pebs_buf_on_cpu(int cpu)
|
||||||
|
{
|
||||||
|
if (!x86_pmu.arch_pebs)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return alloc_pebs_buffer(cpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void release_arch_pebs_buf_on_cpu(int cpu)
|
||||||
|
{
|
||||||
|
if (!x86_pmu.arch_pebs)
|
||||||
|
return;
|
||||||
|
|
||||||
|
release_pebs_buffer(cpu);
|
||||||
|
}
|
||||||
|
|
||||||
|
void init_arch_pebs_on_cpu(int cpu)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||||
|
u64 arch_pebs_base;
|
||||||
|
|
||||||
|
if (!x86_pmu.arch_pebs)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!cpuc->pebs_vaddr) {
|
||||||
|
WARN(1, "Fail to allocate PEBS buffer on CPU %d\n", cpu);
|
||||||
|
x86_pmu.pebs_active = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* 4KB-aligned pointer of the output buffer
|
||||||
|
* (__alloc_pages_node() return page aligned address)
|
||||||
|
* Buffer Size = 4KB * 2^SIZE
|
||||||
|
* contiguous physical buffer (__alloc_pages_node() with order)
|
||||||
|
*/
|
||||||
|
arch_pebs_base = virt_to_phys(cpuc->pebs_vaddr) | PEBS_BUFFER_SHIFT;
|
||||||
|
wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, (u32)arch_pebs_base,
|
||||||
|
(u32)(arch_pebs_base >> 32));
|
||||||
|
x86_pmu.pebs_active = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void fini_arch_pebs_on_cpu(int cpu)
|
||||||
|
{
|
||||||
|
if (!x86_pmu.arch_pebs)
|
||||||
|
return;
|
||||||
|
|
||||||
|
wrmsr_on_cpu(cpu, MSR_IA32_PEBS_BASE, 0, 0);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BTS
|
* BTS
|
||||||
*/
|
*/
|
||||||
|
|
@ -1471,6 +1529,25 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 intel_get_arch_pebs_data_config(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
u64 pebs_data_cfg = 0;
|
||||||
|
u64 cntr_mask;
|
||||||
|
|
||||||
|
if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
pebs_data_cfg |= pebs_update_adaptive_cfg(event);
|
||||||
|
|
||||||
|
cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
|
||||||
|
(PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
|
||||||
|
PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
|
||||||
|
pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
|
||||||
|
|
||||||
|
return pebs_data_cfg;
|
||||||
|
}
|
||||||
|
|
||||||
void intel_pmu_pebs_add(struct perf_event *event)
|
void intel_pmu_pebs_add(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
|
@ -1532,6 +1609,15 @@ static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
|
||||||
intel_pmu_drain_pebs_buffer();
|
intel_pmu_drain_pebs_buffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __intel_pmu_pebs_enable(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
|
||||||
|
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
||||||
|
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
||||||
|
}
|
||||||
|
|
||||||
void intel_pmu_pebs_enable(struct perf_event *event)
|
void intel_pmu_pebs_enable(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
|
@ -1540,9 +1626,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
|
||||||
struct debug_store *ds = cpuc->ds;
|
struct debug_store *ds = cpuc->ds;
|
||||||
unsigned int idx = hwc->idx;
|
unsigned int idx = hwc->idx;
|
||||||
|
|
||||||
hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
|
__intel_pmu_pebs_enable(event);
|
||||||
|
|
||||||
cpuc->pebs_enabled |= 1ULL << hwc->idx;
|
|
||||||
|
|
||||||
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
|
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
|
||||||
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
|
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
|
||||||
|
|
@ -1604,14 +1688,22 @@ void intel_pmu_pebs_del(struct perf_event *event)
|
||||||
pebs_update_state(needed_cb, cpuc, event, false);
|
pebs_update_state(needed_cb, cpuc, event, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
void intel_pmu_pebs_disable(struct perf_event *event)
|
static void __intel_pmu_pebs_disable(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
|
||||||
intel_pmu_drain_large_pebs(cpuc);
|
intel_pmu_drain_large_pebs(cpuc);
|
||||||
|
|
||||||
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
|
||||||
|
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||||
|
}
|
||||||
|
|
||||||
|
void intel_pmu_pebs_disable(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
|
||||||
|
__intel_pmu_pebs_disable(event);
|
||||||
|
|
||||||
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
|
if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
|
||||||
(x86_pmu.version < 5))
|
(x86_pmu.version < 5))
|
||||||
|
|
@ -1623,8 +1715,6 @@ void intel_pmu_pebs_disable(struct perf_event *event)
|
||||||
|
|
||||||
if (cpuc->enabled)
|
if (cpuc->enabled)
|
||||||
wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
wrmsrq(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
|
||||||
|
|
||||||
hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void intel_pmu_pebs_enable_all(void)
|
void intel_pmu_pebs_enable_all(void)
|
||||||
|
|
@ -2060,6 +2150,90 @@ static inline void __setup_pebs_counter_group(struct cpu_hw_events *cpuc,
|
||||||
|
|
||||||
#define PEBS_LATENCY_MASK 0xffff
|
#define PEBS_LATENCY_MASK 0xffff
|
||||||
|
|
||||||
|
static inline void __setup_perf_sample_data(struct perf_event *event,
|
||||||
|
struct pt_regs *iregs,
|
||||||
|
struct perf_sample_data *data)
|
||||||
|
{
|
||||||
|
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We must however always use iregs for the unwinder to stay sane; the
|
||||||
|
* record BP,SP,IP can point into thin air when the record is from a
|
||||||
|
* previous PMI context or an (I)RET happened between the record and
|
||||||
|
* PMI.
|
||||||
|
*/
|
||||||
|
perf_sample_save_callchain(data, event, iregs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __setup_pebs_basic_group(struct perf_event *event,
|
||||||
|
struct pt_regs *regs,
|
||||||
|
struct perf_sample_data *data,
|
||||||
|
u64 sample_type, u64 ip,
|
||||||
|
u64 tsc, u16 retire)
|
||||||
|
{
|
||||||
|
/* The ip in basic is EventingIP */
|
||||||
|
set_linear_ip(regs, ip);
|
||||||
|
regs->flags = PERF_EFLAGS_EXACT;
|
||||||
|
setup_pebs_time(event, data, tsc);
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
|
||||||
|
data->weight.var3_w = retire;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __setup_pebs_gpr_group(struct perf_event *event,
|
||||||
|
struct pt_regs *regs,
|
||||||
|
struct pebs_gprs *gprs,
|
||||||
|
u64 sample_type)
|
||||||
|
{
|
||||||
|
if (event->attr.precise_ip < 2) {
|
||||||
|
set_linear_ip(regs, gprs->ip);
|
||||||
|
regs->flags &= ~PERF_EFLAGS_EXACT;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
|
||||||
|
adaptive_pebs_save_regs(regs, gprs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void __setup_pebs_meminfo_group(struct perf_event *event,
|
||||||
|
struct perf_sample_data *data,
|
||||||
|
u64 sample_type, u64 latency,
|
||||||
|
u16 instr_latency, u64 address,
|
||||||
|
u64 aux, u64 tsx_tuning, u64 ax)
|
||||||
|
{
|
||||||
|
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
|
||||||
|
u64 tsx_latency = intel_get_tsx_weight(tsx_tuning);
|
||||||
|
|
||||||
|
data->weight.var2_w = instr_latency;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Although meminfo::latency is defined as a u64,
|
||||||
|
* only the lower 32 bits include the valid data
|
||||||
|
* in practice on Ice Lake and earlier platforms.
|
||||||
|
*/
|
||||||
|
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||||
|
data->weight.full = latency ?: tsx_latency;
|
||||||
|
else
|
||||||
|
data->weight.var1_dw = (u32)latency ?: tsx_latency;
|
||||||
|
|
||||||
|
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
||||||
|
data->data_src.val = get_data_src(event, aux);
|
||||||
|
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
|
||||||
|
data->addr = address;
|
||||||
|
data->sample_flags |= PERF_SAMPLE_ADDR;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_TRANSACTION) {
|
||||||
|
data->txn = intel_get_tsx_transaction(tsx_tuning, ax);
|
||||||
|
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* With adaptive PEBS the layout depends on what fields are configured.
|
* With adaptive PEBS the layout depends on what fields are configured.
|
||||||
*/
|
*/
|
||||||
|
|
@ -2069,12 +2243,14 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||||
struct pt_regs *regs)
|
struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
u64 sample_type = event->attr.sample_type;
|
||||||
struct pebs_basic *basic = __pebs;
|
struct pebs_basic *basic = __pebs;
|
||||||
void *next_record = basic + 1;
|
void *next_record = basic + 1;
|
||||||
u64 sample_type, format_group;
|
|
||||||
struct pebs_meminfo *meminfo = NULL;
|
struct pebs_meminfo *meminfo = NULL;
|
||||||
struct pebs_gprs *gprs = NULL;
|
struct pebs_gprs *gprs = NULL;
|
||||||
struct x86_perf_regs *perf_regs;
|
struct x86_perf_regs *perf_regs;
|
||||||
|
u64 format_group;
|
||||||
|
u16 retire;
|
||||||
|
|
||||||
if (basic == NULL)
|
if (basic == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
@ -2082,31 +2258,17 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||||
perf_regs = container_of(regs, struct x86_perf_regs, regs);
|
perf_regs = container_of(regs, struct x86_perf_regs, regs);
|
||||||
perf_regs->xmm_regs = NULL;
|
perf_regs->xmm_regs = NULL;
|
||||||
|
|
||||||
sample_type = event->attr.sample_type;
|
|
||||||
format_group = basic->format_group;
|
format_group = basic->format_group;
|
||||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
|
||||||
|
|
||||||
setup_pebs_time(event, data, basic->tsc);
|
__setup_perf_sample_data(event, iregs, data);
|
||||||
|
|
||||||
/*
|
|
||||||
* We must however always use iregs for the unwinder to stay sane; the
|
|
||||||
* record BP,SP,IP can point into thin air when the record is from a
|
|
||||||
* previous PMI context or an (I)RET happened between the record and
|
|
||||||
* PMI.
|
|
||||||
*/
|
|
||||||
perf_sample_save_callchain(data, event, iregs);
|
|
||||||
|
|
||||||
*regs = *iregs;
|
*regs = *iregs;
|
||||||
/* The ip in basic is EventingIP */
|
|
||||||
set_linear_ip(regs, basic->ip);
|
|
||||||
regs->flags = PERF_EFLAGS_EXACT;
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT) {
|
/* basic group */
|
||||||
if (x86_pmu.flags & PMU_FL_RETIRE_LATENCY)
|
retire = x86_pmu.flags & PMU_FL_RETIRE_LATENCY ?
|
||||||
data->weight.var3_w = basic->retire_latency;
|
basic->retire_latency : 0;
|
||||||
else
|
__setup_pebs_basic_group(event, regs, data, sample_type,
|
||||||
data->weight.var3_w = 0;
|
basic->ip, basic->tsc, retire);
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The record for MEMINFO is in front of GP
|
* The record for MEMINFO is in front of GP
|
||||||
|
|
@ -2122,54 +2284,20 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||||
gprs = next_record;
|
gprs = next_record;
|
||||||
next_record = gprs + 1;
|
next_record = gprs + 1;
|
||||||
|
|
||||||
if (event->attr.precise_ip < 2) {
|
__setup_pebs_gpr_group(event, regs, gprs, sample_type);
|
||||||
set_linear_ip(regs, gprs->ip);
|
|
||||||
regs->flags &= ~PERF_EFLAGS_EXACT;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER))
|
|
||||||
adaptive_pebs_save_regs(regs, gprs);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (format_group & PEBS_DATACFG_MEMINFO) {
|
if (format_group & PEBS_DATACFG_MEMINFO) {
|
||||||
if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) {
|
|
||||||
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
|
u64 latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
|
||||||
meminfo->cache_latency : meminfo->mem_latency;
|
meminfo->cache_latency : meminfo->mem_latency;
|
||||||
|
u64 instr_latency = x86_pmu.flags & PMU_FL_INSTR_LATENCY ?
|
||||||
|
meminfo->instr_latency : 0;
|
||||||
|
u64 ax = gprs ? gprs->ax : 0;
|
||||||
|
|
||||||
if (x86_pmu.flags & PMU_FL_INSTR_LATENCY)
|
__setup_pebs_meminfo_group(event, data, sample_type, latency,
|
||||||
data->weight.var2_w = meminfo->instr_latency;
|
instr_latency, meminfo->address,
|
||||||
|
meminfo->aux, meminfo->tsx_tuning,
|
||||||
/*
|
ax);
|
||||||
* Although meminfo::latency is defined as a u64,
|
|
||||||
* only the lower 32 bits include the valid data
|
|
||||||
* in practice on Ice Lake and earlier platforms.
|
|
||||||
*/
|
|
||||||
if (sample_type & PERF_SAMPLE_WEIGHT) {
|
|
||||||
data->weight.full = latency ?:
|
|
||||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
|
||||||
} else {
|
|
||||||
data->weight.var1_dw = (u32)latency ?:
|
|
||||||
intel_get_tsx_weight(meminfo->tsx_tuning);
|
|
||||||
}
|
|
||||||
|
|
||||||
data->sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_DATA_SRC) {
|
|
||||||
data->data_src.val = get_data_src(event, meminfo->aux);
|
|
||||||
data->sample_flags |= PERF_SAMPLE_DATA_SRC;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_ADDR_TYPE) {
|
|
||||||
data->addr = meminfo->address;
|
|
||||||
data->sample_flags |= PERF_SAMPLE_ADDR;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sample_type & PERF_SAMPLE_TRANSACTION) {
|
|
||||||
data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
|
|
||||||
gprs ? gprs->ax : 0);
|
|
||||||
data->sample_flags |= PERF_SAMPLE_TRANSACTION;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (format_group & PEBS_DATACFG_XMMS) {
|
if (format_group & PEBS_DATACFG_XMMS) {
|
||||||
|
|
@ -2220,6 +2348,135 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||||
format_group);
|
format_group);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool arch_pebs_record_continued(struct arch_pebs_header *header)
|
||||||
|
{
|
||||||
|
/* Continue bit or null PEBS record indicates fragment follows. */
|
||||||
|
return header->cont || !(header->format & GENMASK_ULL(63, 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void setup_arch_pebs_sample_data(struct perf_event *event,
|
||||||
|
struct pt_regs *iregs,
|
||||||
|
void *__pebs,
|
||||||
|
struct perf_sample_data *data,
|
||||||
|
struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
u64 sample_type = event->attr.sample_type;
|
||||||
|
struct arch_pebs_header *header = NULL;
|
||||||
|
struct arch_pebs_aux *meminfo = NULL;
|
||||||
|
struct arch_pebs_gprs *gprs = NULL;
|
||||||
|
struct x86_perf_regs *perf_regs;
|
||||||
|
void *next_record;
|
||||||
|
void *at = __pebs;
|
||||||
|
|
||||||
|
if (at == NULL)
|
||||||
|
return;
|
||||||
|
|
||||||
|
perf_regs = container_of(regs, struct x86_perf_regs, regs);
|
||||||
|
perf_regs->xmm_regs = NULL;
|
||||||
|
|
||||||
|
__setup_perf_sample_data(event, iregs, data);
|
||||||
|
|
||||||
|
*regs = *iregs;
|
||||||
|
|
||||||
|
again:
|
||||||
|
header = at;
|
||||||
|
next_record = at + sizeof(struct arch_pebs_header);
|
||||||
|
if (header->basic) {
|
||||||
|
struct arch_pebs_basic *basic = next_record;
|
||||||
|
u16 retire = 0;
|
||||||
|
|
||||||
|
next_record = basic + 1;
|
||||||
|
|
||||||
|
if (sample_type & PERF_SAMPLE_WEIGHT_STRUCT)
|
||||||
|
retire = basic->valid ? basic->retire : 0;
|
||||||
|
__setup_pebs_basic_group(event, regs, data, sample_type,
|
||||||
|
basic->ip, basic->tsc, retire);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The record for MEMINFO is in front of GP
|
||||||
|
* But PERF_SAMPLE_TRANSACTION needs gprs->ax.
|
||||||
|
* Save the pointer here but process later.
|
||||||
|
*/
|
||||||
|
if (header->aux) {
|
||||||
|
meminfo = next_record;
|
||||||
|
next_record = meminfo + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->gpr) {
|
||||||
|
gprs = next_record;
|
||||||
|
next_record = gprs + 1;
|
||||||
|
|
||||||
|
__setup_pebs_gpr_group(event, regs,
|
||||||
|
(struct pebs_gprs *)gprs,
|
||||||
|
sample_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->aux) {
|
||||||
|
u64 ax = gprs ? gprs->ax : 0;
|
||||||
|
|
||||||
|
__setup_pebs_meminfo_group(event, data, sample_type,
|
||||||
|
meminfo->cache_latency,
|
||||||
|
meminfo->instr_latency,
|
||||||
|
meminfo->address, meminfo->aux,
|
||||||
|
meminfo->tsx_tuning, ax);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->xmm) {
|
||||||
|
struct pebs_xmm *xmm;
|
||||||
|
|
||||||
|
next_record += sizeof(struct arch_pebs_xer_header);
|
||||||
|
|
||||||
|
xmm = next_record;
|
||||||
|
perf_regs->xmm_regs = xmm->xmm;
|
||||||
|
next_record = xmm + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->lbr) {
|
||||||
|
struct arch_pebs_lbr_header *lbr_header = next_record;
|
||||||
|
struct lbr_entry *lbr;
|
||||||
|
int num_lbr;
|
||||||
|
|
||||||
|
next_record = lbr_header + 1;
|
||||||
|
lbr = next_record;
|
||||||
|
|
||||||
|
num_lbr = header->lbr == ARCH_PEBS_LBR_NUM_VAR ?
|
||||||
|
lbr_header->depth :
|
||||||
|
header->lbr * ARCH_PEBS_BASE_LBR_ENTRIES;
|
||||||
|
next_record += num_lbr * sizeof(struct lbr_entry);
|
||||||
|
|
||||||
|
if (has_branch_stack(event)) {
|
||||||
|
intel_pmu_store_pebs_lbrs(lbr);
|
||||||
|
intel_pmu_lbr_save_brstack(data, cpuc, event);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (header->cntr) {
|
||||||
|
struct arch_pebs_cntr_header *cntr = next_record;
|
||||||
|
unsigned int nr;
|
||||||
|
|
||||||
|
next_record += sizeof(struct arch_pebs_cntr_header);
|
||||||
|
|
||||||
|
if (is_pebs_counter_event_group(event)) {
|
||||||
|
__setup_pebs_counter_group(cpuc, event,
|
||||||
|
(struct pebs_cntr_header *)cntr, next_record);
|
||||||
|
data->sample_flags |= PERF_SAMPLE_READ;
|
||||||
|
}
|
||||||
|
|
||||||
|
nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
|
||||||
|
if (cntr->metrics == INTEL_CNTR_METRICS)
|
||||||
|
nr += 2;
|
||||||
|
next_record += nr * sizeof(u64);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parse followed fragments if there are. */
|
||||||
|
if (arch_pebs_record_continued(header)) {
|
||||||
|
at = at + header->size;
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static inline void *
|
static inline void *
|
||||||
get_next_pebs_record_by_bit(void *base, void *top, int bit)
|
get_next_pebs_record_by_bit(void *base, void *top, int bit)
|
||||||
{
|
{
|
||||||
|
|
@ -2602,6 +2859,57 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __always_inline void
|
||||||
|
__intel_pmu_handle_pebs_record(struct pt_regs *iregs,
|
||||||
|
struct pt_regs *regs,
|
||||||
|
struct perf_sample_data *data,
|
||||||
|
void *at, u64 pebs_status,
|
||||||
|
short *counts, void **last,
|
||||||
|
setup_fn setup_sample)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
struct perf_event *event;
|
||||||
|
int bit;
|
||||||
|
|
||||||
|
for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
|
||||||
|
event = cpuc->events[bit];
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!event) ||
|
||||||
|
WARN_ON_ONCE(!event->attr.precise_ip))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (counts[bit]++) {
|
||||||
|
__intel_pmu_pebs_event(event, iregs, regs, data,
|
||||||
|
last[bit], setup_sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
last[bit] = at;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline void
|
||||||
|
__intel_pmu_handle_last_pebs_record(struct pt_regs *iregs,
|
||||||
|
struct pt_regs *regs,
|
||||||
|
struct perf_sample_data *data,
|
||||||
|
u64 mask, short *counts, void **last,
|
||||||
|
setup_fn setup_sample)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
struct perf_event *event;
|
||||||
|
int bit;
|
||||||
|
|
||||||
|
for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
|
||||||
|
if (!counts[bit])
|
||||||
|
continue;
|
||||||
|
|
||||||
|
event = cpuc->events[bit];
|
||||||
|
|
||||||
|
__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
|
||||||
|
counts[bit], setup_sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
|
static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_data *data)
|
||||||
{
|
{
|
||||||
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
|
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
|
||||||
|
|
@ -2611,9 +2919,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
||||||
struct x86_perf_regs perf_regs;
|
struct x86_perf_regs perf_regs;
|
||||||
struct pt_regs *regs = &perf_regs.regs;
|
struct pt_regs *regs = &perf_regs.regs;
|
||||||
struct pebs_basic *basic;
|
struct pebs_basic *basic;
|
||||||
struct perf_event *event;
|
|
||||||
void *base, *at, *top;
|
void *base, *at, *top;
|
||||||
int bit;
|
|
||||||
u64 mask;
|
u64 mask;
|
||||||
|
|
||||||
if (!x86_pmu.pebs_active)
|
if (!x86_pmu.pebs_active)
|
||||||
|
|
@ -2626,6 +2932,7 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
||||||
|
|
||||||
mask = hybrid(cpuc->pmu, pebs_events_mask) |
|
mask = hybrid(cpuc->pmu, pebs_events_mask) |
|
||||||
(hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
|
(hybrid(cpuc->pmu, fixed_cntr_mask64) << INTEL_PMC_IDX_FIXED);
|
||||||
|
mask &= cpuc->pebs_enabled;
|
||||||
|
|
||||||
if (unlikely(base >= top)) {
|
if (unlikely(base >= top)) {
|
||||||
intel_pmu_pebs_event_update_no_drain(cpuc, mask);
|
intel_pmu_pebs_event_update_no_drain(cpuc, mask);
|
||||||
|
|
@ -2643,38 +2950,114 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
||||||
if (basic->format_size != cpuc->pebs_record_size)
|
if (basic->format_size != cpuc->pebs_record_size)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
pebs_status = basic->applicable_counters & cpuc->pebs_enabled & mask;
|
pebs_status = mask & basic->applicable_counters;
|
||||||
for_each_set_bit(bit, (unsigned long *)&pebs_status, X86_PMC_IDX_MAX) {
|
__intel_pmu_handle_pebs_record(iregs, regs, data, at,
|
||||||
event = cpuc->events[bit];
|
pebs_status, counts, last,
|
||||||
|
|
||||||
if (WARN_ON_ONCE(!event) ||
|
|
||||||
WARN_ON_ONCE(!event->attr.precise_ip))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (counts[bit]++) {
|
|
||||||
__intel_pmu_pebs_event(event, iregs, regs, data, last[bit],
|
|
||||||
setup_pebs_adaptive_sample_data);
|
setup_pebs_adaptive_sample_data);
|
||||||
}
|
}
|
||||||
last[bit] = at;
|
|
||||||
}
|
__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask, counts, last,
|
||||||
|
setup_pebs_adaptive_sample_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
|
||||||
|
struct perf_sample_data *data)
|
||||||
|
{
|
||||||
|
short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
|
||||||
|
void *last[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS];
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
union arch_pebs_index index;
|
||||||
|
struct x86_perf_regs perf_regs;
|
||||||
|
struct pt_regs *regs = &perf_regs.regs;
|
||||||
|
void *base, *at, *top;
|
||||||
|
u64 mask;
|
||||||
|
|
||||||
|
rdmsrq(MSR_IA32_PEBS_INDEX, index.whole);
|
||||||
|
|
||||||
|
if (unlikely(!index.wr)) {
|
||||||
|
intel_pmu_pebs_event_update_no_drain(cpuc, X86_PMC_IDX_MAX);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for_each_set_bit(bit, (unsigned long *)&mask, X86_PMC_IDX_MAX) {
|
base = cpuc->pebs_vaddr;
|
||||||
if (!counts[bit])
|
top = cpuc->pebs_vaddr + (index.wr << ARCH_PEBS_INDEX_WR_SHIFT);
|
||||||
|
|
||||||
|
index.wr = 0;
|
||||||
|
index.full = 0;
|
||||||
|
index.en = 1;
|
||||||
|
if (cpuc->n_pebs == cpuc->n_large_pebs)
|
||||||
|
index.thresh = ARCH_PEBS_THRESH_MULTI;
|
||||||
|
else
|
||||||
|
index.thresh = ARCH_PEBS_THRESH_SINGLE;
|
||||||
|
wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
|
||||||
|
|
||||||
|
mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
|
||||||
|
|
||||||
|
if (!iregs)
|
||||||
|
iregs = &dummy_iregs;
|
||||||
|
|
||||||
|
/* Process all but the last event for each counter. */
|
||||||
|
for (at = base; at < top;) {
|
||||||
|
struct arch_pebs_header *header;
|
||||||
|
struct arch_pebs_basic *basic;
|
||||||
|
u64 pebs_status;
|
||||||
|
|
||||||
|
header = at;
|
||||||
|
|
||||||
|
if (WARN_ON_ONCE(!header->size))
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* 1st fragment or single record must have basic group */
|
||||||
|
if (!header->basic) {
|
||||||
|
at += header->size;
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
event = cpuc->events[bit];
|
|
||||||
|
|
||||||
__intel_pmu_pebs_last_event(event, iregs, regs, data, last[bit],
|
|
||||||
counts[bit], setup_pebs_adaptive_sample_data);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
basic = at + sizeof(struct arch_pebs_header);
|
||||||
|
pebs_status = mask & basic->applicable_counters;
|
||||||
|
__intel_pmu_handle_pebs_record(iregs, regs, data, at,
|
||||||
|
pebs_status, counts, last,
|
||||||
|
setup_arch_pebs_sample_data);
|
||||||
|
|
||||||
|
/* Skip non-last fragments */
|
||||||
|
while (arch_pebs_record_continued(header)) {
|
||||||
|
if (!header->size)
|
||||||
|
break;
|
||||||
|
at += header->size;
|
||||||
|
header = at;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Skip last fragment or the single record */
|
||||||
|
at += header->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
__intel_pmu_handle_last_pebs_record(iregs, regs, data, mask,
|
||||||
|
counts, last,
|
||||||
|
setup_arch_pebs_sample_data);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void __init intel_arch_pebs_init(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Current hybrid platforms always both support arch-PEBS or not
|
||||||
|
* on all kinds of cores. So directly set x86_pmu.arch_pebs flag
|
||||||
|
* if boot cpu supports arch-PEBS.
|
||||||
|
*/
|
||||||
|
x86_pmu.arch_pebs = 1;
|
||||||
|
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
|
||||||
|
x86_pmu.drain_pebs = intel_pmu_drain_arch_pebs;
|
||||||
|
x86_pmu.pebs_capable = ~0ULL;
|
||||||
|
x86_pmu.flags |= PMU_FL_PEBS_ALL;
|
||||||
|
|
||||||
|
x86_pmu.pebs_enable = __intel_pmu_pebs_enable;
|
||||||
|
x86_pmu.pebs_disable = __intel_pmu_pebs_disable;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* PEBS probe and setup
|
* PEBS probe and setup
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void __init intel_pebs_init(void)
|
static void __init intel_ds_pebs_init(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* No support for 32bit formats
|
* No support for 32bit formats
|
||||||
|
|
@ -2736,10 +3119,8 @@ void __init intel_pebs_init(void)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 6:
|
case 6:
|
||||||
if (x86_pmu.intel_cap.pebs_baseline) {
|
if (x86_pmu.intel_cap.pebs_baseline)
|
||||||
x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
|
x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
|
||||||
x86_pmu.late_setup = intel_pmu_late_setup;
|
|
||||||
}
|
|
||||||
fallthrough;
|
fallthrough;
|
||||||
case 5:
|
case 5:
|
||||||
x86_pmu.pebs_ept = 1;
|
x86_pmu.pebs_ept = 1;
|
||||||
|
|
@ -2789,6 +3170,14 @@ void __init intel_pebs_init(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __init intel_pebs_init(void)
|
||||||
|
{
|
||||||
|
if (x86_pmu.intel_cap.pebs_format == 0xf)
|
||||||
|
intel_arch_pebs_init();
|
||||||
|
else
|
||||||
|
intel_ds_pebs_init();
|
||||||
|
}
|
||||||
|
|
||||||
void perf_restore_debug_store(void)
|
void perf_restore_debug_store(void)
|
||||||
{
|
{
|
||||||
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
|
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
|
||||||
|
|
|
||||||
|
|
@ -283,8 +283,9 @@ struct cpu_hw_events {
|
||||||
* Intel DebugStore bits
|
* Intel DebugStore bits
|
||||||
*/
|
*/
|
||||||
struct debug_store *ds;
|
struct debug_store *ds;
|
||||||
void *ds_pebs_vaddr;
|
|
||||||
void *ds_bts_vaddr;
|
void *ds_bts_vaddr;
|
||||||
|
/* DS based PEBS or arch-PEBS buffer address */
|
||||||
|
void *pebs_vaddr;
|
||||||
u64 pebs_enabled;
|
u64 pebs_enabled;
|
||||||
int n_pebs;
|
int n_pebs;
|
||||||
int n_large_pebs;
|
int n_large_pebs;
|
||||||
|
|
@ -303,6 +304,8 @@ struct cpu_hw_events {
|
||||||
/* Intel ACR configuration */
|
/* Intel ACR configuration */
|
||||||
u64 acr_cfg_b[X86_PMC_IDX_MAX];
|
u64 acr_cfg_b[X86_PMC_IDX_MAX];
|
||||||
u64 acr_cfg_c[X86_PMC_IDX_MAX];
|
u64 acr_cfg_c[X86_PMC_IDX_MAX];
|
||||||
|
/* Cached CFG_C values */
|
||||||
|
u64 cfg_c_val[X86_PMC_IDX_MAX];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Intel LBR bits
|
* Intel LBR bits
|
||||||
|
|
@ -708,6 +711,12 @@ enum hybrid_pmu_type {
|
||||||
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
|
hybrid_big_small_tiny = hybrid_big | hybrid_small_tiny,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_cap {
|
||||||
|
u64 caps;
|
||||||
|
u64 counters;
|
||||||
|
u64 pdists;
|
||||||
|
};
|
||||||
|
|
||||||
struct x86_hybrid_pmu {
|
struct x86_hybrid_pmu {
|
||||||
struct pmu pmu;
|
struct pmu pmu;
|
||||||
const char *name;
|
const char *name;
|
||||||
|
|
@ -752,6 +761,8 @@ struct x86_hybrid_pmu {
|
||||||
mid_ack :1,
|
mid_ack :1,
|
||||||
enabled_ack :1;
|
enabled_ack :1;
|
||||||
|
|
||||||
|
struct arch_pebs_cap arch_pebs_cap;
|
||||||
|
|
||||||
u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX];
|
u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -906,7 +917,7 @@ struct x86_pmu {
|
||||||
union perf_capabilities intel_cap;
|
union perf_capabilities intel_cap;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Intel DebugStore bits
|
* Intel DebugStore and PEBS bits
|
||||||
*/
|
*/
|
||||||
unsigned int bts :1,
|
unsigned int bts :1,
|
||||||
bts_active :1,
|
bts_active :1,
|
||||||
|
|
@ -917,7 +928,8 @@ struct x86_pmu {
|
||||||
pebs_no_tlb :1,
|
pebs_no_tlb :1,
|
||||||
pebs_no_isolation :1,
|
pebs_no_isolation :1,
|
||||||
pebs_block :1,
|
pebs_block :1,
|
||||||
pebs_ept :1;
|
pebs_ept :1,
|
||||||
|
arch_pebs :1;
|
||||||
int pebs_record_size;
|
int pebs_record_size;
|
||||||
int pebs_buffer_size;
|
int pebs_buffer_size;
|
||||||
u64 pebs_events_mask;
|
u64 pebs_events_mask;
|
||||||
|
|
@ -929,6 +941,11 @@ struct x86_pmu {
|
||||||
u64 rtm_abort_event;
|
u64 rtm_abort_event;
|
||||||
u64 pebs_capable;
|
u64 pebs_capable;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Intel Architectural PEBS
|
||||||
|
*/
|
||||||
|
struct arch_pebs_cap arch_pebs_cap;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Intel LBR
|
* Intel LBR
|
||||||
*/
|
*/
|
||||||
|
|
@ -1124,7 +1141,6 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
|
||||||
.pmu_type = _pmu, \
|
.pmu_type = _pmu, \
|
||||||
}
|
}
|
||||||
|
|
||||||
int is_x86_event(struct perf_event *event);
|
|
||||||
struct pmu *x86_get_pmu(unsigned int cpu);
|
struct pmu *x86_get_pmu(unsigned int cpu);
|
||||||
extern struct x86_pmu x86_pmu __read_mostly;
|
extern struct x86_pmu x86_pmu __read_mostly;
|
||||||
|
|
||||||
|
|
@ -1217,7 +1233,7 @@ int x86_reserve_hardware(void);
|
||||||
|
|
||||||
void x86_release_hardware(void);
|
void x86_release_hardware(void);
|
||||||
|
|
||||||
int x86_pmu_max_precise(void);
|
int x86_pmu_max_precise(struct pmu *pmu);
|
||||||
|
|
||||||
void hw_perf_lbr_event_destroy(struct perf_event *event);
|
void hw_perf_lbr_event_destroy(struct perf_event *event);
|
||||||
|
|
||||||
|
|
@ -1604,6 +1620,14 @@ extern void intel_cpuc_finish(struct cpu_hw_events *cpuc);
|
||||||
|
|
||||||
int intel_pmu_init(void);
|
int intel_pmu_init(void);
|
||||||
|
|
||||||
|
int alloc_arch_pebs_buf_on_cpu(int cpu);
|
||||||
|
|
||||||
|
void release_arch_pebs_buf_on_cpu(int cpu);
|
||||||
|
|
||||||
|
void init_arch_pebs_on_cpu(int cpu);
|
||||||
|
|
||||||
|
void fini_arch_pebs_on_cpu(int cpu);
|
||||||
|
|
||||||
void init_debug_store_on_cpu(int cpu);
|
void init_debug_store_on_cpu(int cpu);
|
||||||
|
|
||||||
void fini_debug_store_on_cpu(int cpu);
|
void fini_debug_store_on_cpu(int cpu);
|
||||||
|
|
@ -1760,6 +1784,8 @@ void intel_pmu_pebs_data_source_cmt(void);
|
||||||
|
|
||||||
void intel_pmu_pebs_data_source_lnl(void);
|
void intel_pmu_pebs_data_source_lnl(void);
|
||||||
|
|
||||||
|
u64 intel_get_arch_pebs_data_config(struct perf_event *event);
|
||||||
|
|
||||||
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
int intel_pmu_setup_lbr_filter(struct perf_event *event);
|
||||||
|
|
||||||
void intel_pt_interrupt(void);
|
void intel_pt_interrupt(void);
|
||||||
|
|
@ -1792,6 +1818,11 @@ static inline int intel_pmu_max_num_pebs(struct pmu *pmu)
|
||||||
return fls((u32)hybrid(pmu, pebs_events_mask));
|
return fls((u32)hybrid(pmu, pebs_events_mask));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool intel_pmu_has_pebs(void)
|
||||||
|
{
|
||||||
|
return x86_pmu.ds_pebs || x86_pmu.arch_pebs;
|
||||||
|
}
|
||||||
|
|
||||||
#else /* CONFIG_CPU_SUP_INTEL */
|
#else /* CONFIG_CPU_SUP_INTEL */
|
||||||
|
|
||||||
static inline void reserve_ds_buffers(void)
|
static inline void reserve_ds_buffers(void)
|
||||||
|
|
|
||||||
|
|
@ -44,4 +44,6 @@ enum insn_mmio_type {
|
||||||
|
|
||||||
enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
|
enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes);
|
||||||
|
|
||||||
|
bool insn_is_nop(struct insn *insn);
|
||||||
|
|
||||||
#endif /* _ASM_X86_INSN_EVAL_H */
|
#endif /* _ASM_X86_INSN_EVAL_H */
|
||||||
|
|
|
||||||
|
|
@ -312,7 +312,6 @@ static inline int insn_offset_immediate(struct insn *insn)
|
||||||
/**
|
/**
|
||||||
* for_each_insn_prefix() -- Iterate prefixes in the instruction
|
* for_each_insn_prefix() -- Iterate prefixes in the instruction
|
||||||
* @insn: Pointer to struct insn.
|
* @insn: Pointer to struct insn.
|
||||||
* @idx: Index storage.
|
|
||||||
* @prefix: Prefix byte.
|
* @prefix: Prefix byte.
|
||||||
*
|
*
|
||||||
* Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
|
* Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
|
||||||
|
|
@ -321,8 +320,8 @@ static inline int insn_offset_immediate(struct insn *insn)
|
||||||
* Since prefixes.nbytes can be bigger than 4 if some prefixes
|
* Since prefixes.nbytes can be bigger than 4 if some prefixes
|
||||||
* are repeated, it cannot be used for looping over the prefixes.
|
* are repeated, it cannot be used for looping over the prefixes.
|
||||||
*/
|
*/
|
||||||
#define for_each_insn_prefix(insn, idx, prefix) \
|
#define for_each_insn_prefix(insn, prefix) \
|
||||||
for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
|
for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
|
||||||
|
|
||||||
#define POP_SS_OPCODE 0x1f
|
#define POP_SS_OPCODE 0x1f
|
||||||
#define MOV_SREG_OPCODE 0x8e
|
#define MOV_SREG_OPCODE 0x8e
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,15 @@
|
||||||
#include <linux/percpu-defs.h>
|
#include <linux/percpu-defs.h>
|
||||||
|
|
||||||
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
|
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
|
||||||
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
|
#define PEBS_BUFFER_SHIFT 4
|
||||||
|
#define PEBS_BUFFER_SIZE (PAGE_SIZE << PEBS_BUFFER_SHIFT)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The largest PEBS record could consume a page, ensure
|
||||||
|
* a record at least can be written after triggering PMI.
|
||||||
|
*/
|
||||||
|
#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
|
||||||
|
#define ARCH_PEBS_THRESH_SINGLE 1
|
||||||
|
|
||||||
/* The maximal number of PEBS events: */
|
/* The maximal number of PEBS events: */
|
||||||
#define MAX_PEBS_EVENTS_FMT4 8
|
#define MAX_PEBS_EVENTS_FMT4 8
|
||||||
|
|
|
||||||
|
|
@ -327,6 +327,26 @@
|
||||||
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
|
PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
|
||||||
PERF_CAP_PEBS_TIMING_INFO)
|
PERF_CAP_PEBS_TIMING_INFO)
|
||||||
|
|
||||||
|
/* Arch PEBS */
|
||||||
|
#define MSR_IA32_PEBS_BASE 0x000003f4
|
||||||
|
#define MSR_IA32_PEBS_INDEX 0x000003f5
|
||||||
|
#define ARCH_PEBS_OFFSET_MASK 0x7fffff
|
||||||
|
#define ARCH_PEBS_INDEX_WR_SHIFT 4
|
||||||
|
|
||||||
|
#define ARCH_PEBS_RELOAD 0xffffffff
|
||||||
|
#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35)
|
||||||
|
#define ARCH_PEBS_CNTR_GP BIT_ULL(36)
|
||||||
|
#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37)
|
||||||
|
#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38)
|
||||||
|
#define ARCH_PEBS_LBR_SHIFT 40
|
||||||
|
#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
|
||||||
|
#define ARCH_PEBS_VECR_XMM BIT_ULL(49)
|
||||||
|
#define ARCH_PEBS_GPR BIT_ULL(61)
|
||||||
|
#define ARCH_PEBS_AUX BIT_ULL(62)
|
||||||
|
#define ARCH_PEBS_EN BIT_ULL(63)
|
||||||
|
#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \
|
||||||
|
ARCH_PEBS_CNTR_METRICS)
|
||||||
|
|
||||||
#define MSR_IA32_RTIT_CTL 0x00000570
|
#define MSR_IA32_RTIT_CTL 0x00000570
|
||||||
#define RTIT_CTL_TRACEEN BIT(0)
|
#define RTIT_CTL_TRACEEN BIT(0)
|
||||||
#define RTIT_CTL_CYCLEACC BIT(1)
|
#define RTIT_CTL_CYCLEACC BIT(1)
|
||||||
|
|
|
||||||
|
|
@ -144,13 +144,13 @@
|
||||||
#define PEBS_DATACFG_GP BIT_ULL(1)
|
#define PEBS_DATACFG_GP BIT_ULL(1)
|
||||||
#define PEBS_DATACFG_XMMS BIT_ULL(2)
|
#define PEBS_DATACFG_XMMS BIT_ULL(2)
|
||||||
#define PEBS_DATACFG_LBRS BIT_ULL(3)
|
#define PEBS_DATACFG_LBRS BIT_ULL(3)
|
||||||
#define PEBS_DATACFG_LBR_SHIFT 24
|
|
||||||
#define PEBS_DATACFG_CNTR BIT_ULL(4)
|
#define PEBS_DATACFG_CNTR BIT_ULL(4)
|
||||||
|
#define PEBS_DATACFG_METRICS BIT_ULL(5)
|
||||||
|
#define PEBS_DATACFG_LBR_SHIFT 24
|
||||||
#define PEBS_DATACFG_CNTR_SHIFT 32
|
#define PEBS_DATACFG_CNTR_SHIFT 32
|
||||||
#define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
|
#define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
|
||||||
#define PEBS_DATACFG_FIX_SHIFT 48
|
#define PEBS_DATACFG_FIX_SHIFT 48
|
||||||
#define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0)
|
#define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0)
|
||||||
#define PEBS_DATACFG_METRICS BIT_ULL(5)
|
|
||||||
|
|
||||||
/* Steal the highest bit of pebs_data_cfg for SW usage */
|
/* Steal the highest bit of pebs_data_cfg for SW usage */
|
||||||
#define PEBS_UPDATE_DS_SW BIT_ULL(63)
|
#define PEBS_UPDATE_DS_SW BIT_ULL(63)
|
||||||
|
|
@ -200,6 +200,8 @@ union cpuid10_edx {
|
||||||
#define ARCH_PERFMON_EXT_LEAF 0x00000023
|
#define ARCH_PERFMON_EXT_LEAF 0x00000023
|
||||||
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
|
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
|
||||||
#define ARCH_PERFMON_ACR_LEAF 0x2
|
#define ARCH_PERFMON_ACR_LEAF 0x2
|
||||||
|
#define ARCH_PERFMON_PEBS_CAP_LEAF 0x4
|
||||||
|
#define ARCH_PERFMON_PEBS_COUNTER_LEAF 0x5
|
||||||
|
|
||||||
union cpuid35_eax {
|
union cpuid35_eax {
|
||||||
struct {
|
struct {
|
||||||
|
|
@ -210,7 +212,10 @@ union cpuid35_eax {
|
||||||
unsigned int acr_subleaf:1;
|
unsigned int acr_subleaf:1;
|
||||||
/* Events Sub-Leaf */
|
/* Events Sub-Leaf */
|
||||||
unsigned int events_subleaf:1;
|
unsigned int events_subleaf:1;
|
||||||
unsigned int reserved:28;
|
/* arch-PEBS Sub-Leaves */
|
||||||
|
unsigned int pebs_caps_subleaf:1;
|
||||||
|
unsigned int pebs_cnts_subleaf:1;
|
||||||
|
unsigned int reserved:26;
|
||||||
} split;
|
} split;
|
||||||
unsigned int full;
|
unsigned int full;
|
||||||
};
|
};
|
||||||
|
|
@ -432,6 +437,8 @@ static inline bool is_topdown_idx(int idx)
|
||||||
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
|
#define GLOBAL_STATUS_LBRS_FROZEN BIT_ULL(GLOBAL_STATUS_LBRS_FROZEN_BIT)
|
||||||
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
|
#define GLOBAL_STATUS_TRACE_TOPAPMI_BIT 55
|
||||||
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
|
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(GLOBAL_STATUS_TRACE_TOPAPMI_BIT)
|
||||||
|
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT 54
|
||||||
|
#define GLOBAL_STATUS_ARCH_PEBS_THRESHOLD BIT_ULL(GLOBAL_STATUS_ARCH_PEBS_THRESHOLD_BIT)
|
||||||
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
|
#define GLOBAL_STATUS_PERF_METRICS_OVF_BIT 48
|
||||||
|
|
||||||
#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
|
#define GLOBAL_CTRL_EN_PERF_METRICS BIT_ULL(48)
|
||||||
|
|
@ -502,6 +509,107 @@ struct pebs_cntr_header {
|
||||||
|
|
||||||
#define INTEL_CNTR_METRICS 0x3
|
#define INTEL_CNTR_METRICS 0x3
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Arch PEBS
|
||||||
|
*/
|
||||||
|
union arch_pebs_index {
|
||||||
|
struct {
|
||||||
|
u64 rsvd:4,
|
||||||
|
wr:23,
|
||||||
|
rsvd2:4,
|
||||||
|
full:1,
|
||||||
|
en:1,
|
||||||
|
rsvd3:3,
|
||||||
|
thresh:23,
|
||||||
|
rsvd4:5;
|
||||||
|
};
|
||||||
|
u64 whole;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_header {
|
||||||
|
union {
|
||||||
|
u64 format;
|
||||||
|
struct {
|
||||||
|
u64 size:16, /* Record size */
|
||||||
|
rsvd:14,
|
||||||
|
mode:1, /* 64BIT_MODE */
|
||||||
|
cont:1,
|
||||||
|
rsvd2:3,
|
||||||
|
cntr:5,
|
||||||
|
lbr:2,
|
||||||
|
rsvd3:7,
|
||||||
|
xmm:1,
|
||||||
|
ymmh:1,
|
||||||
|
rsvd4:2,
|
||||||
|
opmask:1,
|
||||||
|
zmmh:1,
|
||||||
|
h16zmm:1,
|
||||||
|
rsvd5:5,
|
||||||
|
gpr:1,
|
||||||
|
aux:1,
|
||||||
|
basic:1;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
u64 rsvd6;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_basic {
|
||||||
|
u64 ip;
|
||||||
|
u64 applicable_counters;
|
||||||
|
u64 tsc;
|
||||||
|
u64 retire :16, /* Retire Latency */
|
||||||
|
valid :1,
|
||||||
|
rsvd :47;
|
||||||
|
u64 rsvd2;
|
||||||
|
u64 rsvd3;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_aux {
|
||||||
|
u64 address;
|
||||||
|
u64 rsvd;
|
||||||
|
u64 rsvd2;
|
||||||
|
u64 rsvd3;
|
||||||
|
u64 rsvd4;
|
||||||
|
u64 aux;
|
||||||
|
u64 instr_latency :16,
|
||||||
|
pad2 :16,
|
||||||
|
cache_latency :16,
|
||||||
|
pad3 :16;
|
||||||
|
u64 tsx_tuning;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_gprs {
|
||||||
|
u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
|
||||||
|
u64 r8, r9, r10, r11, r12, r13, r14, r15, ssp;
|
||||||
|
u64 rsvd;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_xer_header {
|
||||||
|
u64 xstate;
|
||||||
|
u64 rsvd;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define ARCH_PEBS_LBR_NAN 0x0
|
||||||
|
#define ARCH_PEBS_LBR_NUM_8 0x1
|
||||||
|
#define ARCH_PEBS_LBR_NUM_16 0x2
|
||||||
|
#define ARCH_PEBS_LBR_NUM_VAR 0x3
|
||||||
|
#define ARCH_PEBS_BASE_LBR_ENTRIES 8
|
||||||
|
struct arch_pebs_lbr_header {
|
||||||
|
u64 rsvd;
|
||||||
|
u64 ctl;
|
||||||
|
u64 depth;
|
||||||
|
u64 ler_from;
|
||||||
|
u64 ler_to;
|
||||||
|
u64 ler_info;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct arch_pebs_cntr_header {
|
||||||
|
u32 cntr;
|
||||||
|
u32 fixed;
|
||||||
|
u32 metrics;
|
||||||
|
u32 reserved;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AMD Extended Performance Monitoring and Debug cpuid feature detection
|
* AMD Extended Performance Monitoring and Debug cpuid feature detection
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,41 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _ASM_X86_UNWIND_USER_H
|
||||||
|
#define _ASM_X86_UNWIND_USER_H
|
||||||
|
|
||||||
|
#ifdef CONFIG_HAVE_UNWIND_USER_FP
|
||||||
|
|
||||||
|
#include <asm/ptrace.h>
|
||||||
|
#include <asm/uprobes.h>
|
||||||
|
|
||||||
|
#define ARCH_INIT_USER_FP_FRAME(ws) \
|
||||||
|
.cfa_off = 2*(ws), \
|
||||||
|
.ra_off = -1*(ws), \
|
||||||
|
.fp_off = -2*(ws), \
|
||||||
|
.use_fp = true,
|
||||||
|
|
||||||
|
#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \
|
||||||
|
.cfa_off = 1*(ws), \
|
||||||
|
.ra_off = -1*(ws), \
|
||||||
|
.fp_off = 0, \
|
||||||
|
.use_fp = false,
|
||||||
|
|
||||||
|
static inline int unwind_user_word_size(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
/* We can't unwind VM86 stacks */
|
||||||
|
if (regs->flags & X86_VM_MASK)
|
||||||
|
return 0;
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
if (!user_64bit_mode(regs))
|
||||||
|
return sizeof(int);
|
||||||
|
#endif
|
||||||
|
return sizeof(long);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool unwind_user_at_function_start(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
return is_uprobe_at_func_entry(regs);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* CONFIG_HAVE_UNWIND_USER_FP */
|
||||||
|
|
||||||
|
#endif /* _ASM_X86_UNWIND_USER_H */
|
||||||
|
|
@ -62,4 +62,13 @@ struct arch_uprobe_task {
|
||||||
unsigned int saved_tf;
|
unsigned int saved_tf;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_UPROBES
|
||||||
|
extern bool is_uprobe_at_func_entry(struct pt_regs *regs);
|
||||||
|
#else
|
||||||
|
static bool is_uprobe_at_func_entry(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_UPROBES */
|
||||||
|
|
||||||
#endif /* _ASM_UPROBES_H */
|
#endif /* _ASM_UPROBES_H */
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include <asm/text-patching.h>
|
#include <asm/text-patching.h>
|
||||||
#include <asm/insn.h>
|
#include <asm/insn.h>
|
||||||
|
#include <asm/insn-eval.h>
|
||||||
#include <asm/ibt.h>
|
#include <asm/ibt.h>
|
||||||
#include <asm/set_memory.h>
|
#include <asm/set_memory.h>
|
||||||
#include <asm/nmi.h>
|
#include <asm/nmi.h>
|
||||||
|
|
@ -345,25 +346,6 @@ static void add_nop(u8 *buf, unsigned int len)
|
||||||
*buf = INT3_INSN_OPCODE;
|
*buf = INT3_INSN_OPCODE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Matches NOP and NOPL, not any of the other possible NOPs.
|
|
||||||
*/
|
|
||||||
static bool insn_is_nop(struct insn *insn)
|
|
||||||
{
|
|
||||||
/* Anything NOP, but no REP NOP */
|
|
||||||
if (insn->opcode.bytes[0] == 0x90 &&
|
|
||||||
(!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* NOPL */
|
|
||||||
if (insn->opcode.bytes[0] == 0x0F && insn->opcode.bytes[1] == 0x1F)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
/* TODO: more nops */
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Find the offset of the first non-NOP instruction starting at @offset
|
* Find the offset of the first non-NOP instruction starting at @offset
|
||||||
* but no further than @len.
|
* but no further than @len.
|
||||||
|
|
|
||||||
|
|
@ -141,7 +141,6 @@ bool can_boost(struct insn *insn, void *addr)
|
||||||
{
|
{
|
||||||
kprobe_opcode_t opcode;
|
kprobe_opcode_t opcode;
|
||||||
insn_byte_t prefix;
|
insn_byte_t prefix;
|
||||||
int i;
|
|
||||||
|
|
||||||
if (search_exception_tables((unsigned long)addr))
|
if (search_exception_tables((unsigned long)addr))
|
||||||
return false; /* Page fault may occur on this address. */
|
return false; /* Page fault may occur on this address. */
|
||||||
|
|
@ -154,7 +153,7 @@ bool can_boost(struct insn *insn, void *addr)
|
||||||
if (insn->opcode.nbytes != 1)
|
if (insn->opcode.nbytes != 1)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
for_each_insn_prefix(insn, i, prefix) {
|
for_each_insn_prefix(insn, prefix) {
|
||||||
insn_attr_t attr;
|
insn_attr_t attr;
|
||||||
|
|
||||||
attr = inat_get_opcode_attribute(prefix);
|
attr = inat_get_opcode_attribute(prefix);
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,7 @@
|
||||||
#include <linux/kdebug.h>
|
#include <linux/kdebug.h>
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/insn.h>
|
#include <asm/insn.h>
|
||||||
|
#include <asm/insn-eval.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
#include <asm/nops.h>
|
#include <asm/nops.h>
|
||||||
|
|
||||||
|
|
@ -258,9 +259,8 @@ static volatile u32 good_2byte_insns[256 / 32] = {
|
||||||
static bool is_prefix_bad(struct insn *insn)
|
static bool is_prefix_bad(struct insn *insn)
|
||||||
{
|
{
|
||||||
insn_byte_t p;
|
insn_byte_t p;
|
||||||
int i;
|
|
||||||
|
|
||||||
for_each_insn_prefix(insn, i, p) {
|
for_each_insn_prefix(insn, p) {
|
||||||
insn_attr_t attr;
|
insn_attr_t attr;
|
||||||
|
|
||||||
attr = inat_get_opcode_attribute(p);
|
attr = inat_get_opcode_attribute(p);
|
||||||
|
|
@ -1158,35 +1158,12 @@ void arch_uprobe_optimize(struct arch_uprobe *auprobe, unsigned long vaddr)
|
||||||
mmap_write_unlock(mm);
|
mmap_write_unlock(mm);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool insn_is_nop(struct insn *insn)
|
|
||||||
{
|
|
||||||
return insn->opcode.nbytes == 1 && insn->opcode.bytes[0] == 0x90;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool insn_is_nopl(struct insn *insn)
|
|
||||||
{
|
|
||||||
if (insn->opcode.nbytes != 2)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (insn->opcode.bytes[0] != 0x0f || insn->opcode.bytes[1] != 0x1f)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (!insn->modrm.nbytes)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (X86_MODRM_REG(insn->modrm.bytes[0]) != 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* 0f 1f /0 - NOPL */
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool can_optimize(struct insn *insn, unsigned long vaddr)
|
static bool can_optimize(struct insn *insn, unsigned long vaddr)
|
||||||
{
|
{
|
||||||
if (!insn->x86_64 || insn->length != 5)
|
if (!insn->x86_64 || insn->length != 5)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!insn_is_nop(insn) && !insn_is_nopl(insn))
|
if (!insn_is_nop(insn))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* We can't do cross page atomic writes yet. */
|
/* We can't do cross page atomic writes yet. */
|
||||||
|
|
@ -1426,19 +1403,14 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
|
||||||
{
|
{
|
||||||
u8 opc1 = OPCODE1(insn);
|
u8 opc1 = OPCODE1(insn);
|
||||||
insn_byte_t p;
|
insn_byte_t p;
|
||||||
int i;
|
|
||||||
|
|
||||||
/* x86_nops[insn->length]; same as jmp with .offs = 0 */
|
if (insn_is_nop(insn))
|
||||||
if (insn->length <= ASM_NOP_MAX &&
|
|
||||||
!memcmp(insn->kaddr, x86_nops[insn->length], insn->length))
|
|
||||||
goto setup;
|
goto setup;
|
||||||
|
|
||||||
switch (opc1) {
|
switch (opc1) {
|
||||||
case 0xeb: /* jmp 8 */
|
case 0xeb: /* jmp 8 */
|
||||||
case 0xe9: /* jmp 32 */
|
case 0xe9: /* jmp 32 */
|
||||||
break;
|
break;
|
||||||
case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */
|
|
||||||
goto setup;
|
|
||||||
|
|
||||||
case 0xe8: /* call relative */
|
case 0xe8: /* call relative */
|
||||||
branch_clear_offset(auprobe, insn);
|
branch_clear_offset(auprobe, insn);
|
||||||
|
|
@ -1463,7 +1435,7 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
|
||||||
* Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
|
* Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
|
||||||
* No one uses these insns, reject any branch insns with such prefix.
|
* No one uses these insns, reject any branch insns with such prefix.
|
||||||
*/
|
*/
|
||||||
for_each_insn_prefix(insn, i, p) {
|
for_each_insn_prefix(insn, p) {
|
||||||
if (p == 0x66)
|
if (p == 0x66)
|
||||||
return -ENOTSUPP;
|
return -ENOTSUPP;
|
||||||
}
|
}
|
||||||
|
|
@ -1819,3 +1791,35 @@ bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
|
||||||
else
|
else
|
||||||
return regs->sp <= ret->stack;
|
return regs->sp <= ret->stack;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Heuristic-based check if uprobe is installed at the function entry.
|
||||||
|
*
|
||||||
|
* Under assumption of user code being compiled with frame pointers,
|
||||||
|
* `push %rbp/%ebp` is a good indicator that we indeed are.
|
||||||
|
*
|
||||||
|
* Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
|
||||||
|
* If we get this wrong, captured stack trace might have one extra bogus
|
||||||
|
* entry, but the rest of stack trace will still be meaningful.
|
||||||
|
*/
|
||||||
|
bool is_uprobe_at_func_entry(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
struct arch_uprobe *auprobe;
|
||||||
|
|
||||||
|
if (!current->utask)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
auprobe = current->utask->auprobe;
|
||||||
|
if (!auprobe)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* push %rbp/%ebp */
|
||||||
|
if (auprobe->insn[0] == 0x55)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* endbr64 (64-bit only) */
|
||||||
|
if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -63,11 +63,10 @@ static bool is_string_insn(struct insn *insn)
|
||||||
bool insn_has_rep_prefix(struct insn *insn)
|
bool insn_has_rep_prefix(struct insn *insn)
|
||||||
{
|
{
|
||||||
insn_byte_t p;
|
insn_byte_t p;
|
||||||
int i;
|
|
||||||
|
|
||||||
insn_get_prefixes(insn);
|
insn_get_prefixes(insn);
|
||||||
|
|
||||||
for_each_insn_prefix(insn, i, p) {
|
for_each_insn_prefix(insn, p) {
|
||||||
if (p == 0xf2 || p == 0xf3)
|
if (p == 0xf2 || p == 0xf3)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -92,13 +91,13 @@ bool insn_has_rep_prefix(struct insn *insn)
|
||||||
static int get_seg_reg_override_idx(struct insn *insn)
|
static int get_seg_reg_override_idx(struct insn *insn)
|
||||||
{
|
{
|
||||||
int idx = INAT_SEG_REG_DEFAULT;
|
int idx = INAT_SEG_REG_DEFAULT;
|
||||||
int num_overrides = 0, i;
|
int num_overrides = 0;
|
||||||
insn_byte_t p;
|
insn_byte_t p;
|
||||||
|
|
||||||
insn_get_prefixes(insn);
|
insn_get_prefixes(insn);
|
||||||
|
|
||||||
/* Look for any segment override prefixes. */
|
/* Look for any segment override prefixes. */
|
||||||
for_each_insn_prefix(insn, i, p) {
|
for_each_insn_prefix(insn, p) {
|
||||||
insn_attr_t attr;
|
insn_attr_t attr;
|
||||||
|
|
||||||
attr = inat_get_opcode_attribute(p);
|
attr = inat_get_opcode_attribute(p);
|
||||||
|
|
@ -1676,3 +1675,147 @@ enum insn_mmio_type insn_decode_mmio(struct insn *insn, int *bytes)
|
||||||
|
|
||||||
return type;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Recognise typical NOP patterns for both 32bit and 64bit.
|
||||||
|
*
|
||||||
|
* Notably:
|
||||||
|
* - NOP, but not: REP NOP aka PAUSE
|
||||||
|
* - NOPL
|
||||||
|
* - MOV %reg, %reg
|
||||||
|
* - LEA 0(%reg),%reg
|
||||||
|
* - JMP +0
|
||||||
|
*
|
||||||
|
* Must not have false-positives; instructions identified as a NOP might be
|
||||||
|
* emulated as a NOP (uprobe) or Run Length Encoded in a larger NOP
|
||||||
|
* (alternatives).
|
||||||
|
*
|
||||||
|
* False-negatives are fine; need not be exhaustive.
|
||||||
|
*/
|
||||||
|
bool insn_is_nop(struct insn *insn)
|
||||||
|
{
|
||||||
|
u8 b3 = 0, x3 = 0, r3 = 0;
|
||||||
|
u8 b4 = 0, x4 = 0, r4 = 0, m = 0;
|
||||||
|
u8 modrm, modrm_mod, modrm_reg, modrm_rm;
|
||||||
|
u8 sib = 0, sib_scale, sib_index, sib_base;
|
||||||
|
u8 nrex, rex;
|
||||||
|
u8 p, rep = 0;
|
||||||
|
|
||||||
|
if ((nrex = insn->rex_prefix.nbytes)) {
|
||||||
|
rex = insn->rex_prefix.bytes[nrex-1];
|
||||||
|
|
||||||
|
r3 = !!X86_REX_R(rex);
|
||||||
|
x3 = !!X86_REX_X(rex);
|
||||||
|
b3 = !!X86_REX_B(rex);
|
||||||
|
if (nrex > 1) {
|
||||||
|
r4 = !!X86_REX2_R(rex);
|
||||||
|
x4 = !!X86_REX2_X(rex);
|
||||||
|
b4 = !!X86_REX2_B(rex);
|
||||||
|
m = !!X86_REX2_M(rex);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (insn->vex_prefix.nbytes) {
|
||||||
|
/*
|
||||||
|
* Ignore VEX encoded NOPs
|
||||||
|
*/
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (insn->modrm.nbytes) {
|
||||||
|
modrm = insn->modrm.bytes[0];
|
||||||
|
modrm_mod = X86_MODRM_MOD(modrm);
|
||||||
|
modrm_reg = X86_MODRM_REG(modrm) + 8*r3 + 16*r4;
|
||||||
|
modrm_rm = X86_MODRM_RM(modrm) + 8*b3 + 16*b4;
|
||||||
|
modrm = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (insn->sib.nbytes) {
|
||||||
|
sib = insn->sib.bytes[0];
|
||||||
|
sib_scale = X86_SIB_SCALE(sib);
|
||||||
|
sib_index = X86_SIB_INDEX(sib) + 8*x3 + 16*x4;
|
||||||
|
sib_base = X86_SIB_BASE(sib) + 8*b3 + 16*b4;
|
||||||
|
sib = 1;
|
||||||
|
|
||||||
|
modrm_rm = sib_base;
|
||||||
|
}
|
||||||
|
|
||||||
|
for_each_insn_prefix(insn, p) {
|
||||||
|
if (p == 0xf3) /* REPE */
|
||||||
|
rep = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Opcode map munging:
|
||||||
|
*
|
||||||
|
* REX2: 0 - single byte opcode
|
||||||
|
* 1 - 0f second byte opcode
|
||||||
|
*/
|
||||||
|
switch (m) {
|
||||||
|
case 0: break;
|
||||||
|
case 1: insn->opcode.value <<= 8;
|
||||||
|
insn->opcode.value |= 0x0f;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (insn->opcode.bytes[0]) {
|
||||||
|
case 0x0f: /* 2nd byte */
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 0x89: /* MOV */
|
||||||
|
if (modrm_mod != 3) /* register-direct */
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* native size */
|
||||||
|
if (insn->opnd_bytes != 4 * (1 + insn->x86_64))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return modrm_reg == modrm_rm; /* MOV %reg, %reg */
|
||||||
|
|
||||||
|
case 0x8d: /* LEA */
|
||||||
|
if (modrm_mod == 0 || modrm_mod == 3) /* register-indirect with disp */
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* native size */
|
||||||
|
if (insn->opnd_bytes != 4 * (1 + insn->x86_64))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (insn->displacement.value != 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (sib && (sib_scale != 0 || sib_index != 4)) /* (%reg, %eiz, 1) */
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for_each_insn_prefix(insn, p) {
|
||||||
|
if (p != 0x3e) /* DS */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return modrm_reg == modrm_rm; /* LEA 0(%reg), %reg */
|
||||||
|
|
||||||
|
case 0x90: /* NOP */
|
||||||
|
if (b3 || b4) /* XCHG %r{8,16,24},%rax */
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (rep) /* REP NOP := PAUSE */
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case 0xe9: /* JMP.d32 */
|
||||||
|
case 0xeb: /* JMP.d8 */
|
||||||
|
return insn->immediate.value == 0; /* JMP +0 */
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (insn->opcode.bytes[1]) {
|
||||||
|
case 0x1f:
|
||||||
|
return modrm_reg == 0; /* 0f 1f /0 -- NOPL */
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -253,11 +253,11 @@ static __always_inline void exit_to_user_mode_prepare(struct pt_regs *regs)
|
||||||
static __always_inline void exit_to_user_mode(void)
|
static __always_inline void exit_to_user_mode(void)
|
||||||
{
|
{
|
||||||
instrumentation_begin();
|
instrumentation_begin();
|
||||||
|
unwind_reset_info();
|
||||||
trace_hardirqs_on_prepare();
|
trace_hardirqs_on_prepare();
|
||||||
lockdep_hardirqs_on_prepare();
|
lockdep_hardirqs_on_prepare();
|
||||||
instrumentation_end();
|
instrumentation_end();
|
||||||
|
|
||||||
unwind_reset_info();
|
|
||||||
user_enter_irqoff();
|
user_enter_irqoff();
|
||||||
arch_exit_to_user_mode();
|
arch_exit_to_user_mode();
|
||||||
lockdep_hardirqs_on(CALLER_ADDR0);
|
lockdep_hardirqs_on(CALLER_ADDR0);
|
||||||
|
|
|
||||||
|
|
@ -1720,7 +1720,7 @@ extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct p
|
||||||
extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
|
extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
|
||||||
extern struct perf_callchain_entry *
|
extern struct perf_callchain_entry *
|
||||||
get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
|
get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
|
||||||
u32 max_stack, bool crosstask, bool add_mark);
|
u32 max_stack, bool crosstask, bool add_mark, u64 defer_cookie);
|
||||||
extern int get_callchain_buffers(int max_stack);
|
extern int get_callchain_buffers(int max_stack);
|
||||||
extern void put_callchain_buffers(void);
|
extern void put_callchain_buffers(void);
|
||||||
extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
|
extern struct perf_callchain_entry *get_callchain_entry(int *rctx);
|
||||||
|
|
|
||||||
|
|
@ -6,16 +6,6 @@
|
||||||
#include <linux/unwind_user.h>
|
#include <linux/unwind_user.h>
|
||||||
#include <linux/unwind_deferred_types.h>
|
#include <linux/unwind_deferred_types.h>
|
||||||
|
|
||||||
struct unwind_work;
|
|
||||||
|
|
||||||
typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 cookie);
|
|
||||||
|
|
||||||
struct unwind_work {
|
|
||||||
struct list_head list;
|
|
||||||
unwind_callback_t func;
|
|
||||||
int bit;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef CONFIG_UNWIND_USER
|
#ifdef CONFIG_UNWIND_USER
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
|
@ -44,23 +34,23 @@ void unwind_deferred_task_exit(struct task_struct *task);
|
||||||
static __always_inline void unwind_reset_info(void)
|
static __always_inline void unwind_reset_info(void)
|
||||||
{
|
{
|
||||||
struct unwind_task_info *info = ¤t->unwind_info;
|
struct unwind_task_info *info = ¤t->unwind_info;
|
||||||
unsigned long bits;
|
unsigned long bits = atomic_long_read(&info->unwind_mask);
|
||||||
|
|
||||||
/* Was there any unwinding? */
|
/* Was there any unwinding? */
|
||||||
if (unlikely(info->unwind_mask)) {
|
if (likely(!bits))
|
||||||
bits = info->unwind_mask;
|
return;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
/* Is a task_work going to run again before going back */
|
/* Is a task_work going to run again before going back */
|
||||||
if (bits & UNWIND_PENDING)
|
if (bits & UNWIND_PENDING)
|
||||||
return;
|
return;
|
||||||
} while (!try_cmpxchg(&info->unwind_mask, &bits, 0UL));
|
} while (!atomic_long_try_cmpxchg(&info->unwind_mask, &bits, 0UL));
|
||||||
current->unwind_info.id.id = 0;
|
current->unwind_info.id.id = 0;
|
||||||
|
|
||||||
if (unlikely(info->cache)) {
|
if (unlikely(info->cache)) {
|
||||||
info->cache->nr_entries = 0;
|
info->cache->nr_entries = 0;
|
||||||
info->cache->unwind_completed = 0;
|
info->cache->unwind_completed = 0;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#else /* !CONFIG_UNWIND_USER */
|
#else /* !CONFIG_UNWIND_USER */
|
||||||
|
|
@ -68,9 +58,17 @@ static __always_inline void unwind_reset_info(void)
|
||||||
static inline void unwind_task_init(struct task_struct *task) {}
|
static inline void unwind_task_init(struct task_struct *task) {}
|
||||||
static inline void unwind_task_free(struct task_struct *task) {}
|
static inline void unwind_task_free(struct task_struct *task) {}
|
||||||
|
|
||||||
static inline int unwind_user_faultable(struct unwind_stacktrace *trace) { return -ENOSYS; }
|
static inline int unwind_user_faultable(struct unwind_stacktrace *trace)
|
||||||
static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; }
|
{ return -ENOSYS; }
|
||||||
static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; }
|
|
||||||
|
static inline int
|
||||||
|
unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
|
||||||
|
{ return -ENOSYS; }
|
||||||
|
|
||||||
|
static inline int
|
||||||
|
unwind_deferred_request(struct unwind_work *work, u64 *timestamp)
|
||||||
|
{ return -ENOSYS; }
|
||||||
|
|
||||||
static inline void unwind_deferred_cancel(struct unwind_work *work) {}
|
static inline void unwind_deferred_cancel(struct unwind_work *work) {}
|
||||||
|
|
||||||
static inline void unwind_deferred_task_exit(struct task_struct *task) {}
|
static inline void unwind_deferred_task_exit(struct task_struct *task) {}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,9 @@
|
||||||
#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
|
#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H
|
||||||
#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
|
#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/atomic.h>
|
||||||
|
|
||||||
struct unwind_cache {
|
struct unwind_cache {
|
||||||
unsigned long unwind_completed;
|
unsigned long unwind_completed;
|
||||||
unsigned int nr_entries;
|
unsigned int nr_entries;
|
||||||
|
|
@ -30,10 +33,23 @@ union unwind_task_id {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct unwind_task_info {
|
struct unwind_task_info {
|
||||||
unsigned long unwind_mask;
|
atomic_long_t unwind_mask;
|
||||||
struct unwind_cache *cache;
|
struct unwind_cache *cache;
|
||||||
struct callback_head work;
|
struct callback_head work;
|
||||||
union unwind_task_id id;
|
union unwind_task_id id;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct unwind_work;
|
||||||
|
struct unwind_stacktrace;
|
||||||
|
|
||||||
|
typedef void (*unwind_callback_t)(struct unwind_work *work,
|
||||||
|
struct unwind_stacktrace *trace,
|
||||||
|
u64 cookie);
|
||||||
|
|
||||||
|
struct unwind_work {
|
||||||
|
struct list_head list;
|
||||||
|
unwind_callback_t func;
|
||||||
|
int bit;
|
||||||
|
};
|
||||||
|
|
||||||
#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */
|
#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */
|
||||||
|
|
|
||||||
|
|
@ -36,8 +36,10 @@ struct unwind_user_state {
|
||||||
unsigned long ip;
|
unsigned long ip;
|
||||||
unsigned long sp;
|
unsigned long sp;
|
||||||
unsigned long fp;
|
unsigned long fp;
|
||||||
|
unsigned int ws;
|
||||||
enum unwind_user_type current_type;
|
enum unwind_user_type current_type;
|
||||||
unsigned int available_types;
|
unsigned int available_types;
|
||||||
|
bool topmost;
|
||||||
bool done;
|
bool done;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -463,7 +463,9 @@ struct perf_event_attr {
|
||||||
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
|
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
|
||||||
remove_on_exec : 1, /* event is removed from task on exec */
|
remove_on_exec : 1, /* event is removed from task on exec */
|
||||||
sigtrap : 1, /* send synchronous SIGTRAP on event */
|
sigtrap : 1, /* send synchronous SIGTRAP on event */
|
||||||
__reserved_1 : 26;
|
defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */
|
||||||
|
defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */
|
||||||
|
__reserved_1 : 24;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
__u32 wakeup_events; /* wake up every n events */
|
__u32 wakeup_events; /* wake up every n events */
|
||||||
|
|
@ -1239,6 +1241,22 @@ enum perf_event_type {
|
||||||
*/
|
*/
|
||||||
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
|
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This user callchain capture was deferred until shortly before
|
||||||
|
* returning to user space. Previous samples would have kernel
|
||||||
|
* callchains only and they need to be stitched with this to make full
|
||||||
|
* callchains.
|
||||||
|
*
|
||||||
|
* struct {
|
||||||
|
* struct perf_event_header header;
|
||||||
|
* u64 cookie;
|
||||||
|
* u64 nr;
|
||||||
|
* u64 ips[nr];
|
||||||
|
* struct sample_id sample_id;
|
||||||
|
* };
|
||||||
|
*/
|
||||||
|
PERF_RECORD_CALLCHAIN_DEFERRED = 22,
|
||||||
|
|
||||||
PERF_RECORD_MAX, /* non-ABI */
|
PERF_RECORD_MAX, /* non-ABI */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -1269,6 +1287,7 @@ enum perf_callchain_context {
|
||||||
PERF_CONTEXT_HV = (__u64)-32,
|
PERF_CONTEXT_HV = (__u64)-32,
|
||||||
PERF_CONTEXT_KERNEL = (__u64)-128,
|
PERF_CONTEXT_KERNEL = (__u64)-128,
|
||||||
PERF_CONTEXT_USER = (__u64)-512,
|
PERF_CONTEXT_USER = (__u64)-512,
|
||||||
|
PERF_CONTEXT_USER_DEFERRED = (__u64)-640,
|
||||||
|
|
||||||
PERF_CONTEXT_GUEST = (__u64)-2048,
|
PERF_CONTEXT_GUEST = (__u64)-2048,
|
||||||
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
|
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
|
||||||
|
|
|
||||||
|
|
@ -315,7 +315,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
|
||||||
max_depth = sysctl_perf_event_max_stack;
|
max_depth = sysctl_perf_event_max_stack;
|
||||||
|
|
||||||
trace = get_perf_callchain(regs, kernel, user, max_depth,
|
trace = get_perf_callchain(regs, kernel, user, max_depth,
|
||||||
false, false);
|
false, false, 0);
|
||||||
|
|
||||||
if (unlikely(!trace))
|
if (unlikely(!trace))
|
||||||
/* couldn't fetch the stack trace */
|
/* couldn't fetch the stack trace */
|
||||||
|
|
@ -452,7 +452,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
|
||||||
trace = get_callchain_entry_for_task(task, max_depth);
|
trace = get_callchain_entry_for_task(task, max_depth);
|
||||||
else
|
else
|
||||||
trace = get_perf_callchain(regs, kernel, user, max_depth,
|
trace = get_perf_callchain(regs, kernel, user, max_depth,
|
||||||
crosstask, false);
|
crosstask, false, 0);
|
||||||
|
|
||||||
if (unlikely(!trace) || trace->nr < skip) {
|
if (unlikely(!trace) || trace->nr < skip) {
|
||||||
if (may_fault)
|
if (may_fault)
|
||||||
|
|
|
||||||
|
|
@ -218,7 +218,7 @@ static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entr
|
||||||
|
|
||||||
struct perf_callchain_entry *
|
struct perf_callchain_entry *
|
||||||
get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
|
get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
|
||||||
u32 max_stack, bool crosstask, bool add_mark)
|
u32 max_stack, bool crosstask, bool add_mark, u64 defer_cookie)
|
||||||
{
|
{
|
||||||
struct perf_callchain_entry *entry;
|
struct perf_callchain_entry *entry;
|
||||||
struct perf_callchain_entry_ctx ctx;
|
struct perf_callchain_entry_ctx ctx;
|
||||||
|
|
@ -251,6 +251,18 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
|
||||||
regs = task_pt_regs(current);
|
regs = task_pt_regs(current);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (defer_cookie) {
|
||||||
|
/*
|
||||||
|
* Foretell the coming of PERF_RECORD_CALLCHAIN_DEFERRED
|
||||||
|
* which can be stitched to this one, and add
|
||||||
|
* the cookie after it (it will be cut off when the
|
||||||
|
* user stack is copied to the callchain).
|
||||||
|
*/
|
||||||
|
perf_callchain_store_context(&ctx, PERF_CONTEXT_USER_DEFERRED);
|
||||||
|
perf_callchain_store_context(&ctx, defer_cookie);
|
||||||
|
goto exit_put;
|
||||||
|
}
|
||||||
|
|
||||||
if (add_mark)
|
if (add_mark)
|
||||||
perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
|
perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,7 @@
|
||||||
#include <linux/buildid.h>
|
#include <linux/buildid.h>
|
||||||
#include <linux/task_work.h>
|
#include <linux/task_work.h>
|
||||||
#include <linux/percpu-rwsem.h>
|
#include <linux/percpu-rwsem.h>
|
||||||
|
#include <linux/unwind_deferred.h>
|
||||||
|
|
||||||
#include "internal.h"
|
#include "internal.h"
|
||||||
|
|
||||||
|
|
@ -8200,6 +8201,8 @@ static u64 perf_get_page_size(unsigned long addr)
|
||||||
|
|
||||||
static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
|
static struct perf_callchain_entry __empty_callchain = { .nr = 0, };
|
||||||
|
|
||||||
|
static struct unwind_work perf_unwind_work;
|
||||||
|
|
||||||
struct perf_callchain_entry *
|
struct perf_callchain_entry *
|
||||||
perf_callchain(struct perf_event *event, struct pt_regs *regs)
|
perf_callchain(struct perf_event *event, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
|
|
@ -8208,8 +8211,11 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
|
||||||
!(current->flags & (PF_KTHREAD | PF_USER_WORKER));
|
!(current->flags & (PF_KTHREAD | PF_USER_WORKER));
|
||||||
/* Disallow cross-task user callchains. */
|
/* Disallow cross-task user callchains. */
|
||||||
bool crosstask = event->ctx->task && event->ctx->task != current;
|
bool crosstask = event->ctx->task && event->ctx->task != current;
|
||||||
|
bool defer_user = IS_ENABLED(CONFIG_UNWIND_USER) && user &&
|
||||||
|
event->attr.defer_callchain;
|
||||||
const u32 max_stack = event->attr.sample_max_stack;
|
const u32 max_stack = event->attr.sample_max_stack;
|
||||||
struct perf_callchain_entry *callchain;
|
struct perf_callchain_entry *callchain;
|
||||||
|
u64 defer_cookie;
|
||||||
|
|
||||||
if (!current->mm)
|
if (!current->mm)
|
||||||
user = false;
|
user = false;
|
||||||
|
|
@ -8217,8 +8223,13 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
|
||||||
if (!kernel && !user)
|
if (!kernel && !user)
|
||||||
return &__empty_callchain;
|
return &__empty_callchain;
|
||||||
|
|
||||||
callchain = get_perf_callchain(regs, kernel, user,
|
if (!(user && defer_user && !crosstask &&
|
||||||
max_stack, crosstask, true);
|
unwind_deferred_request(&perf_unwind_work, &defer_cookie) >= 0))
|
||||||
|
defer_cookie = 0;
|
||||||
|
|
||||||
|
callchain = get_perf_callchain(regs, kernel, user, max_stack,
|
||||||
|
crosstask, true, defer_cookie);
|
||||||
|
|
||||||
return callchain ?: &__empty_callchain;
|
return callchain ?: &__empty_callchain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -10003,6 +10014,66 @@ void perf_event_bpf_event(struct bpf_prog *prog,
|
||||||
perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
|
perf_iterate_sb(perf_event_bpf_output, &bpf_event, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct perf_callchain_deferred_event {
|
||||||
|
struct unwind_stacktrace *trace;
|
||||||
|
struct {
|
||||||
|
struct perf_event_header header;
|
||||||
|
u64 cookie;
|
||||||
|
u64 nr;
|
||||||
|
u64 ips[];
|
||||||
|
} event;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void perf_callchain_deferred_output(struct perf_event *event, void *data)
|
||||||
|
{
|
||||||
|
struct perf_callchain_deferred_event *deferred_event = data;
|
||||||
|
struct perf_output_handle handle;
|
||||||
|
struct perf_sample_data sample;
|
||||||
|
int ret, size = deferred_event->event.header.size;
|
||||||
|
|
||||||
|
if (!event->attr.defer_output)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* XXX do we really need sample_id_all for this ??? */
|
||||||
|
perf_event_header__init_id(&deferred_event->event.header, &sample, event);
|
||||||
|
|
||||||
|
ret = perf_output_begin(&handle, &sample, event,
|
||||||
|
deferred_event->event.header.size);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
perf_output_put(&handle, deferred_event->event);
|
||||||
|
for (int i = 0; i < deferred_event->trace->nr; i++) {
|
||||||
|
u64 entry = deferred_event->trace->entries[i];
|
||||||
|
perf_output_put(&handle, entry);
|
||||||
|
}
|
||||||
|
perf_event__output_id_sample(event, &handle, &sample);
|
||||||
|
|
||||||
|
perf_output_end(&handle);
|
||||||
|
out:
|
||||||
|
deferred_event->event.header.size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void perf_unwind_deferred_callback(struct unwind_work *work,
|
||||||
|
struct unwind_stacktrace *trace, u64 cookie)
|
||||||
|
{
|
||||||
|
struct perf_callchain_deferred_event deferred_event = {
|
||||||
|
.trace = trace,
|
||||||
|
.event = {
|
||||||
|
.header = {
|
||||||
|
.type = PERF_RECORD_CALLCHAIN_DEFERRED,
|
||||||
|
.misc = PERF_RECORD_MISC_USER,
|
||||||
|
.size = sizeof(deferred_event.event) +
|
||||||
|
(trace->nr * sizeof(u64)),
|
||||||
|
},
|
||||||
|
.cookie = cookie,
|
||||||
|
.nr = trace->nr,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
perf_iterate_sb(perf_callchain_deferred_output, &deferred_event, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
struct perf_text_poke_event {
|
struct perf_text_poke_event {
|
||||||
const void *old_bytes;
|
const void *old_bytes;
|
||||||
const void *new_bytes;
|
const void *new_bytes;
|
||||||
|
|
@ -14809,6 +14880,9 @@ void __init perf_event_init(void)
|
||||||
|
|
||||||
idr_init(&pmu_idr);
|
idr_init(&pmu_idr);
|
||||||
|
|
||||||
|
unwind_deferred_init(&perf_unwind_work,
|
||||||
|
perf_unwind_deferred_callback);
|
||||||
|
|
||||||
perf_event_init_all_cpus();
|
perf_event_init_all_cpus();
|
||||||
init_srcu_struct(&pmus_srcu);
|
init_srcu_struct(&pmus_srcu);
|
||||||
perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
|
perf_pmu_register(&perf_swevent, "software", PERF_TYPE_SOFTWARE);
|
||||||
|
|
|
||||||
|
|
@ -940,7 +940,6 @@ void __noreturn do_exit(long code)
|
||||||
|
|
||||||
tsk->exit_code = code;
|
tsk->exit_code = code;
|
||||||
taskstats_exit(tsk, group_dead);
|
taskstats_exit(tsk, group_dead);
|
||||||
unwind_deferred_task_exit(tsk);
|
|
||||||
trace_sched_process_exit(tsk, group_dead);
|
trace_sched_process_exit(tsk, group_dead);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -951,6 +950,12 @@ void __noreturn do_exit(long code)
|
||||||
* gets woken up by child-exit notifications.
|
* gets woken up by child-exit notifications.
|
||||||
*/
|
*/
|
||||||
perf_event_exit_task(tsk);
|
perf_event_exit_task(tsk);
|
||||||
|
/*
|
||||||
|
* PF_EXITING (above) ensures unwind_deferred_request() will no
|
||||||
|
* longer add new unwinds. While exit_mm() (below) will destroy the
|
||||||
|
* abaility to do unwinds. So flush any pending unwinds here.
|
||||||
|
*/
|
||||||
|
unwind_deferred_task_exit(tsk);
|
||||||
|
|
||||||
exit_mm();
|
exit_mm();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,12 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
|
||||||
#ifdef CONFIG_IRQ_WORK
|
#ifdef CONFIG_IRQ_WORK
|
||||||
static void task_work_set_notify_irq(struct irq_work *entry)
|
static void task_work_set_notify_irq(struct irq_work *entry)
|
||||||
{
|
{
|
||||||
test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
|
/*
|
||||||
|
* no-op IPI
|
||||||
|
*
|
||||||
|
* TWA_NMI_CURRENT will already have set the TIF flag, all
|
||||||
|
* this interrupt does it tickle the return-to-user path.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) =
|
static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) =
|
||||||
IRQ_WORK_INIT_HARD(task_work_set_notify_irq);
|
IRQ_WORK_INIT_HARD(task_work_set_notify_irq);
|
||||||
|
|
@ -86,6 +91,7 @@ int task_work_add(struct task_struct *task, struct callback_head *work,
|
||||||
break;
|
break;
|
||||||
#ifdef CONFIG_IRQ_WORK
|
#ifdef CONFIG_IRQ_WORK
|
||||||
case TWA_NMI_CURRENT:
|
case TWA_NMI_CURRENT:
|
||||||
|
set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
|
||||||
irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume));
|
irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume));
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ DEFINE_STATIC_SRCU(unwind_srcu);
|
||||||
|
|
||||||
static inline bool unwind_pending(struct unwind_task_info *info)
|
static inline bool unwind_pending(struct unwind_task_info *info)
|
||||||
{
|
{
|
||||||
return test_bit(UNWIND_PENDING_BIT, &info->unwind_mask);
|
return atomic_long_read(&info->unwind_mask) & UNWIND_PENDING;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -79,6 +79,8 @@ static u64 get_cookie(struct unwind_task_info *info)
|
||||||
{
|
{
|
||||||
u32 cnt = 1;
|
u32 cnt = 1;
|
||||||
|
|
||||||
|
lockdep_assert_irqs_disabled();
|
||||||
|
|
||||||
if (info->id.cpu)
|
if (info->id.cpu)
|
||||||
return info->id.id;
|
return info->id.id;
|
||||||
|
|
||||||
|
|
@ -126,23 +128,20 @@ int unwind_user_faultable(struct unwind_stacktrace *trace)
|
||||||
|
|
||||||
cache = info->cache;
|
cache = info->cache;
|
||||||
trace->entries = cache->entries;
|
trace->entries = cache->entries;
|
||||||
|
trace->nr = cache->nr_entries;
|
||||||
if (cache->nr_entries) {
|
|
||||||
/*
|
/*
|
||||||
* The user stack has already been previously unwound in this
|
* The user stack has already been previously unwound in this
|
||||||
* entry context. Skip the unwind and use the cache.
|
* entry context. Skip the unwind and use the cache.
|
||||||
*/
|
*/
|
||||||
trace->nr = cache->nr_entries;
|
if (trace->nr)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
|
||||||
|
|
||||||
trace->nr = 0;
|
|
||||||
unwind_user(trace, UNWIND_MAX_ENTRIES);
|
unwind_user(trace, UNWIND_MAX_ENTRIES);
|
||||||
|
|
||||||
cache->nr_entries = trace->nr;
|
cache->nr_entries = trace->nr;
|
||||||
|
|
||||||
/* Clear nr_entries on way back to user space */
|
/* Clear nr_entries on way back to user space */
|
||||||
set_bit(UNWIND_USED_BIT, &info->unwind_mask);
|
atomic_long_or(UNWIND_USED, &info->unwind_mask);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -160,7 +159,7 @@ static void process_unwind_deferred(struct task_struct *task)
|
||||||
|
|
||||||
/* Clear pending bit but make sure to have the current bits */
|
/* Clear pending bit but make sure to have the current bits */
|
||||||
bits = atomic_long_fetch_andnot(UNWIND_PENDING,
|
bits = atomic_long_fetch_andnot(UNWIND_PENDING,
|
||||||
(atomic_long_t *)&info->unwind_mask);
|
&info->unwind_mask);
|
||||||
/*
|
/*
|
||||||
* From here on out, the callback must always be called, even if it's
|
* From here on out, the callback must always be called, even if it's
|
||||||
* just an empty trace.
|
* just an empty trace.
|
||||||
|
|
@ -231,6 +230,7 @@ void unwind_deferred_task_exit(struct task_struct *task)
|
||||||
int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
|
int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
|
||||||
{
|
{
|
||||||
struct unwind_task_info *info = ¤t->unwind_info;
|
struct unwind_task_info *info = ¤t->unwind_info;
|
||||||
|
int twa_mode = TWA_RESUME;
|
||||||
unsigned long old, bits;
|
unsigned long old, bits;
|
||||||
unsigned long bit;
|
unsigned long bit;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
@ -246,8 +246,11 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
|
||||||
* Trigger a warning to make it obvious that an architecture
|
* Trigger a warning to make it obvious that an architecture
|
||||||
* is using this in NMI when it should not be.
|
* is using this in NMI when it should not be.
|
||||||
*/
|
*/
|
||||||
if (WARN_ON_ONCE(!CAN_USE_IN_NMI && in_nmi()))
|
if (in_nmi()) {
|
||||||
|
if (WARN_ON_ONCE(!CAN_USE_IN_NMI))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
twa_mode = TWA_NMI_CURRENT;
|
||||||
|
}
|
||||||
|
|
||||||
/* Do not allow cancelled works to request again */
|
/* Do not allow cancelled works to request again */
|
||||||
bit = READ_ONCE(work->bit);
|
bit = READ_ONCE(work->bit);
|
||||||
|
|
@ -261,7 +264,7 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
|
||||||
|
|
||||||
*cookie = get_cookie(info);
|
*cookie = get_cookie(info);
|
||||||
|
|
||||||
old = READ_ONCE(info->unwind_mask);
|
old = atomic_long_read(&info->unwind_mask);
|
||||||
|
|
||||||
/* Is this already queued or executed */
|
/* Is this already queued or executed */
|
||||||
if (old & bit)
|
if (old & bit)
|
||||||
|
|
@ -274,7 +277,7 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
|
||||||
* to have a callback.
|
* to have a callback.
|
||||||
*/
|
*/
|
||||||
bits = UNWIND_PENDING | bit;
|
bits = UNWIND_PENDING | bit;
|
||||||
old = atomic_long_fetch_or(bits, (atomic_long_t *)&info->unwind_mask);
|
old = atomic_long_fetch_or(bits, &info->unwind_mask);
|
||||||
if (old & bits) {
|
if (old & bits) {
|
||||||
/*
|
/*
|
||||||
* If the work's bit was set, whatever set it had better
|
* If the work's bit was set, whatever set it had better
|
||||||
|
|
@ -285,10 +288,10 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The work has been claimed, now schedule it. */
|
/* The work has been claimed, now schedule it. */
|
||||||
ret = task_work_add(current, &info->work, TWA_RESUME);
|
ret = task_work_add(current, &info->work, twa_mode);
|
||||||
|
|
||||||
if (WARN_ON_ONCE(ret))
|
if (WARN_ON_ONCE(ret))
|
||||||
WRITE_ONCE(info->unwind_mask, 0);
|
atomic_long_set(&info->unwind_mask, 0);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
@ -320,7 +323,8 @@ void unwind_deferred_cancel(struct unwind_work *work)
|
||||||
guard(rcu)();
|
guard(rcu)();
|
||||||
/* Clear this bit from all threads */
|
/* Clear this bit from all threads */
|
||||||
for_each_process_thread(g, t) {
|
for_each_process_thread(g, t) {
|
||||||
clear_bit(bit, &t->unwind_info.unwind_mask);
|
atomic_long_andnot(BIT(bit),
|
||||||
|
&t->unwind_info.unwind_mask);
|
||||||
if (t->unwind_info.cache)
|
if (t->unwind_info.cache)
|
||||||
clear_bit(bit, &t->unwind_info.cache->unwind_completed);
|
clear_bit(bit, &t->unwind_info.cache->unwind_completed);
|
||||||
}
|
}
|
||||||
|
|
@ -350,7 +354,7 @@ void unwind_task_init(struct task_struct *task)
|
||||||
|
|
||||||
memset(info, 0, sizeof(*info));
|
memset(info, 0, sizeof(*info));
|
||||||
init_task_work(&info->work, unwind_deferred_task_work);
|
init_task_work(&info->work, unwind_deferred_task_work);
|
||||||
info->unwind_mask = 0;
|
atomic_long_set(&info->unwind_mask, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void unwind_task_free(struct task_struct *task)
|
void unwind_task_free(struct task_struct *task)
|
||||||
|
|
|
||||||
|
|
@ -8,18 +8,28 @@
|
||||||
#include <linux/unwind_user.h>
|
#include <linux/unwind_user.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
|
||||||
static const struct unwind_user_frame fp_frame = {
|
|
||||||
ARCH_INIT_USER_FP_FRAME
|
|
||||||
};
|
|
||||||
|
|
||||||
#define for_each_user_frame(state) \
|
#define for_each_user_frame(state) \
|
||||||
for (unwind_user_start(state); !(state)->done; unwind_user_next(state))
|
for (unwind_user_start(state); !(state)->done; unwind_user_next(state))
|
||||||
|
|
||||||
static int unwind_user_next_fp(struct unwind_user_state *state)
|
static inline int
|
||||||
|
get_user_word(unsigned long *word, unsigned long base, int off, unsigned int ws)
|
||||||
|
{
|
||||||
|
unsigned long __user *addr = (void __user *)base + off;
|
||||||
|
#ifdef CONFIG_COMPAT
|
||||||
|
if (ws == sizeof(int)) {
|
||||||
|
unsigned int data;
|
||||||
|
int ret = get_user(data, (unsigned int __user *)addr);
|
||||||
|
*word = data;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return get_user(*word, addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int unwind_user_next_common(struct unwind_user_state *state,
|
||||||
|
const struct unwind_user_frame *frame)
|
||||||
{
|
{
|
||||||
const struct unwind_user_frame *frame = &fp_frame;
|
|
||||||
unsigned long cfa, fp, ra;
|
unsigned long cfa, fp, ra;
|
||||||
unsigned int shift;
|
|
||||||
|
|
||||||
if (frame->use_fp) {
|
if (frame->use_fp) {
|
||||||
if (state->fp < state->sp)
|
if (state->fp < state->sp)
|
||||||
|
|
@ -37,24 +47,45 @@ static int unwind_user_next_fp(struct unwind_user_state *state)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* Make sure that the address is word aligned */
|
/* Make sure that the address is word aligned */
|
||||||
shift = sizeof(long) == 4 ? 2 : 3;
|
if (cfa & (state->ws - 1))
|
||||||
if (cfa & ((1 << shift) - 1))
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* Find the Return Address (RA) */
|
/* Find the Return Address (RA) */
|
||||||
if (get_user(ra, (unsigned long *)(cfa + frame->ra_off)))
|
if (get_user_word(&ra, cfa, frame->ra_off, state->ws))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off)))
|
if (frame->fp_off && get_user_word(&fp, cfa, frame->fp_off, state->ws))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
state->ip = ra;
|
state->ip = ra;
|
||||||
state->sp = cfa;
|
state->sp = cfa;
|
||||||
if (frame->fp_off)
|
if (frame->fp_off)
|
||||||
state->fp = fp;
|
state->fp = fp;
|
||||||
|
state->topmost = false;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int unwind_user_next_fp(struct unwind_user_state *state)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_HAVE_UNWIND_USER_FP
|
||||||
|
struct pt_regs *regs = task_pt_regs(current);
|
||||||
|
|
||||||
|
if (state->topmost && unwind_user_at_function_start(regs)) {
|
||||||
|
const struct unwind_user_frame fp_entry_frame = {
|
||||||
|
ARCH_INIT_USER_FP_ENTRY_FRAME(state->ws)
|
||||||
|
};
|
||||||
|
return unwind_user_next_common(state, &fp_entry_frame);
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct unwind_user_frame fp_frame = {
|
||||||
|
ARCH_INIT_USER_FP_FRAME(state->ws)
|
||||||
|
};
|
||||||
|
return unwind_user_next_common(state, &fp_frame);
|
||||||
|
#else
|
||||||
|
return -EINVAL;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
static int unwind_user_next(struct unwind_user_state *state)
|
static int unwind_user_next(struct unwind_user_state *state)
|
||||||
{
|
{
|
||||||
unsigned long iter_mask = state->available_types;
|
unsigned long iter_mask = state->available_types;
|
||||||
|
|
@ -102,6 +133,12 @@ static int unwind_user_start(struct unwind_user_state *state)
|
||||||
state->ip = instruction_pointer(regs);
|
state->ip = instruction_pointer(regs);
|
||||||
state->sp = user_stack_pointer(regs);
|
state->sp = user_stack_pointer(regs);
|
||||||
state->fp = frame_pointer(regs);
|
state->fp = frame_pointer(regs);
|
||||||
|
state->ws = unwind_user_word_size(regs);
|
||||||
|
if (!state->ws) {
|
||||||
|
state->done = true;
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
state->topmost = true;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -312,7 +312,6 @@ static inline int insn_offset_immediate(struct insn *insn)
|
||||||
/**
|
/**
|
||||||
* for_each_insn_prefix() -- Iterate prefixes in the instruction
|
* for_each_insn_prefix() -- Iterate prefixes in the instruction
|
||||||
* @insn: Pointer to struct insn.
|
* @insn: Pointer to struct insn.
|
||||||
* @idx: Index storage.
|
|
||||||
* @prefix: Prefix byte.
|
* @prefix: Prefix byte.
|
||||||
*
|
*
|
||||||
* Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
|
* Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
|
||||||
|
|
@ -321,8 +320,8 @@ static inline int insn_offset_immediate(struct insn *insn)
|
||||||
* Since prefixes.nbytes can be bigger than 4 if some prefixes
|
* Since prefixes.nbytes can be bigger than 4 if some prefixes
|
||||||
* are repeated, it cannot be used for looping over the prefixes.
|
* are repeated, it cannot be used for looping over the prefixes.
|
||||||
*/
|
*/
|
||||||
#define for_each_insn_prefix(insn, idx, prefix) \
|
#define for_each_insn_prefix(insn, prefix) \
|
||||||
for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
|
for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
|
||||||
|
|
||||||
#define POP_SS_OPCODE 0x1f
|
#define POP_SS_OPCODE 0x1f
|
||||||
#define MOV_SREG_OPCODE 0x8e
|
#define MOV_SREG_OPCODE 0x8e
|
||||||
|
|
|
||||||
|
|
@ -463,7 +463,9 @@ struct perf_event_attr {
|
||||||
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
|
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
|
||||||
remove_on_exec : 1, /* event is removed from task on exec */
|
remove_on_exec : 1, /* event is removed from task on exec */
|
||||||
sigtrap : 1, /* send synchronous SIGTRAP on event */
|
sigtrap : 1, /* send synchronous SIGTRAP on event */
|
||||||
__reserved_1 : 26;
|
defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */
|
||||||
|
defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */
|
||||||
|
__reserved_1 : 24;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
__u32 wakeup_events; /* wake up every n events */
|
__u32 wakeup_events; /* wake up every n events */
|
||||||
|
|
@ -1239,6 +1241,22 @@ enum perf_event_type {
|
||||||
*/
|
*/
|
||||||
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
|
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This user callchain capture was deferred until shortly before
|
||||||
|
* returning to user space. Previous samples would have kernel
|
||||||
|
* callchains only and they need to be stitched with this to make full
|
||||||
|
* callchains.
|
||||||
|
*
|
||||||
|
* struct {
|
||||||
|
* struct perf_event_header header;
|
||||||
|
* u64 cookie;
|
||||||
|
* u64 nr;
|
||||||
|
* u64 ips[nr];
|
||||||
|
* struct sample_id sample_id;
|
||||||
|
* };
|
||||||
|
*/
|
||||||
|
PERF_RECORD_CALLCHAIN_DEFERRED = 22,
|
||||||
|
|
||||||
PERF_RECORD_MAX, /* non-ABI */
|
PERF_RECORD_MAX, /* non-ABI */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -1269,6 +1287,7 @@ enum perf_callchain_context {
|
||||||
PERF_CONTEXT_HV = (__u64)-32,
|
PERF_CONTEXT_HV = (__u64)-32,
|
||||||
PERF_CONTEXT_KERNEL = (__u64)-128,
|
PERF_CONTEXT_KERNEL = (__u64)-128,
|
||||||
PERF_CONTEXT_USER = (__u64)-512,
|
PERF_CONTEXT_USER = (__u64)-512,
|
||||||
|
PERF_CONTEXT_USER_DEFERRED = (__u64)-640,
|
||||||
|
|
||||||
PERF_CONTEXT_GUEST = (__u64)-2048,
|
PERF_CONTEXT_GUEST = (__u64)-2048,
|
||||||
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
|
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue