mirror of https://github.com/torvalds/linux.git
x86/mce: Separate global and per-CPU quirks
Many quirks are global configuration settings and a handful apply to each CPU. Move the per-CPU quirks to vendor init to execute them on each online CPU. Set the global quirks during BSP-only init so they're only executed once and early. Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Reviewed-by: Tony Luck <tony.luck@intel.com> Reviewed-by: Nikolay Borisov <nik.borisov@suse.com> Tested-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/20250908-wip-mca-updates-v6-0-eef5d6c74b9c@amd.com
This commit is contained in:
parent
a46b2bbe1e
commit
7eee1e9268
|
|
@ -646,6 +646,28 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
|
||||||
wrmsrq(MSR_K7_HWCR, hwcr);
|
wrmsrq(MSR_K7_HWCR, hwcr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
|
||||||
|
{
|
||||||
|
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
||||||
|
|
||||||
|
/* This should be disabled by the BIOS, but isn't always */
|
||||||
|
if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
|
||||||
|
/*
|
||||||
|
* disable GART TBL walk error reporting, which
|
||||||
|
* trips off incorrectly with the IOMMU & 3ware
|
||||||
|
* & Cerberus:
|
||||||
|
*/
|
||||||
|
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Various K7s with broken bank 0 around. Always disable
|
||||||
|
* by default.
|
||||||
|
*/
|
||||||
|
if (c->x86 == 6 && this_cpu_read(mce_num_banks))
|
||||||
|
mce_banks[0].ctl = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* cpu init entry point, called from mce.c with preempt off */
|
/* cpu init entry point, called from mce.c with preempt off */
|
||||||
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||||
{
|
{
|
||||||
|
|
@ -653,6 +675,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
|
||||||
u32 low = 0, high = 0, address = 0;
|
u32 low = 0, high = 0, address = 0;
|
||||||
int offset = -1;
|
int offset = -1;
|
||||||
|
|
||||||
|
amd_apply_cpu_quirks(c);
|
||||||
|
|
||||||
mce_flags.amd_threshold = 1;
|
mce_flags.amd_threshold = 1;
|
||||||
|
|
||||||
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
|
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
|
||||||
|
|
|
||||||
|
|
@ -1807,8 +1807,9 @@ static void __mcheck_cpu_mce_banks_init(void)
|
||||||
struct mce_bank *b = &mce_banks[i];
|
struct mce_bank *b = &mce_banks[i];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Init them all, __mcheck_cpu_apply_quirks() is going to apply
|
* Init them all by default.
|
||||||
* the required vendor quirks before
|
*
|
||||||
|
* The required vendor quirks will be applied before
|
||||||
* __mcheck_cpu_init_prepare_banks() does the final bank setup.
|
* __mcheck_cpu_init_prepare_banks() does the final bank setup.
|
||||||
*/
|
*/
|
||||||
b->ctl = -1ULL;
|
b->ctl = -1ULL;
|
||||||
|
|
@ -1880,20 +1881,8 @@ static void __mcheck_cpu_init_prepare_banks(void)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void apply_quirks_amd(struct cpuinfo_x86 *c)
|
static void amd_apply_global_quirks(struct cpuinfo_x86 *c)
|
||||||
{
|
{
|
||||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
|
||||||
|
|
||||||
/* This should be disabled by the BIOS, but isn't always */
|
|
||||||
if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
|
|
||||||
/*
|
|
||||||
* disable GART TBL walk error reporting, which
|
|
||||||
* trips off incorrectly with the IOMMU & 3ware
|
|
||||||
* & Cerberus:
|
|
||||||
*/
|
|
||||||
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
|
if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
|
||||||
/*
|
/*
|
||||||
* Lots of broken BIOS around that don't clear them
|
* Lots of broken BIOS around that don't clear them
|
||||||
|
|
@ -1902,13 +1891,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
|
||||||
mca_cfg.bootlog = 0;
|
mca_cfg.bootlog = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Various K7s with broken bank 0 around. Always disable
|
|
||||||
* by default.
|
|
||||||
*/
|
|
||||||
if (c->x86 == 6 && this_cpu_read(mce_num_banks))
|
|
||||||
mce_banks[0].ctl = 0;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* overflow_recov is supported for F15h Models 00h-0fh
|
* overflow_recov is supported for F15h Models 00h-0fh
|
||||||
* even though we don't have a CPUID bit for it.
|
* even though we don't have a CPUID bit for it.
|
||||||
|
|
@ -1920,25 +1902,12 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
|
||||||
mce_flags.zen_ifu_quirk = 1;
|
mce_flags.zen_ifu_quirk = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void apply_quirks_intel(struct cpuinfo_x86 *c)
|
static void intel_apply_global_quirks(struct cpuinfo_x86 *c)
|
||||||
{
|
{
|
||||||
struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
|
|
||||||
|
|
||||||
/* Older CPUs (prior to family 6) don't need quirks. */
|
/* Older CPUs (prior to family 6) don't need quirks. */
|
||||||
if (c->x86_vfm < INTEL_PENTIUM_PRO)
|
if (c->x86_vfm < INTEL_PENTIUM_PRO)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
|
||||||
* SDM documents that on family 6 bank 0 should not be written
|
|
||||||
* because it aliases to another special BIOS controlled
|
|
||||||
* register.
|
|
||||||
* But it's not aliased anymore on model 0x1a+
|
|
||||||
* Don't ignore bank 0 completely because there could be a
|
|
||||||
* valid event later, merely don't write CTL0.
|
|
||||||
*/
|
|
||||||
if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
|
|
||||||
mce_banks[0].init = false;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* All newer Intel systems support MCE broadcasting. Enable
|
* All newer Intel systems support MCE broadcasting. Enable
|
||||||
* synchronization with a one second timeout.
|
* synchronization with a one second timeout.
|
||||||
|
|
@ -1964,7 +1933,7 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c)
|
||||||
mce_flags.skx_repmov_quirk = 1;
|
mce_flags.skx_repmov_quirk = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
|
static void zhaoxin_apply_global_quirks(struct cpuinfo_x86 *c)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* All newer Zhaoxin CPUs support MCE broadcasting. Enable
|
* All newer Zhaoxin CPUs support MCE broadcasting. Enable
|
||||||
|
|
@ -1976,29 +1945,6 @@ static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Add per CPU specific workarounds here */
|
|
||||||
static void __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
|
|
||||||
{
|
|
||||||
struct mca_config *cfg = &mca_cfg;
|
|
||||||
|
|
||||||
switch (c->x86_vendor) {
|
|
||||||
case X86_VENDOR_AMD:
|
|
||||||
apply_quirks_amd(c);
|
|
||||||
break;
|
|
||||||
case X86_VENDOR_INTEL:
|
|
||||||
apply_quirks_intel(c);
|
|
||||||
break;
|
|
||||||
case X86_VENDOR_ZHAOXIN:
|
|
||||||
apply_quirks_zhaoxin(c);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cfg->monarch_timeout < 0)
|
|
||||||
cfg->monarch_timeout = 0;
|
|
||||||
if (cfg->bootlog != 0)
|
|
||||||
cfg->panic_timeout = 30;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
|
static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
|
||||||
{
|
{
|
||||||
if (c->x86 != 5)
|
if (c->x86 != 5)
|
||||||
|
|
@ -2256,6 +2202,23 @@ void mca_bsp_init(struct cpuinfo_x86 *c)
|
||||||
|
|
||||||
if (cap & MCG_SER_P)
|
if (cap & MCG_SER_P)
|
||||||
mca_cfg.ser = 1;
|
mca_cfg.ser = 1;
|
||||||
|
|
||||||
|
switch (c->x86_vendor) {
|
||||||
|
case X86_VENDOR_AMD:
|
||||||
|
amd_apply_global_quirks(c);
|
||||||
|
break;
|
||||||
|
case X86_VENDOR_INTEL:
|
||||||
|
intel_apply_global_quirks(c);
|
||||||
|
break;
|
||||||
|
case X86_VENDOR_ZHAOXIN:
|
||||||
|
zhaoxin_apply_global_quirks(c);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mca_cfg.monarch_timeout < 0)
|
||||||
|
mca_cfg.monarch_timeout = 0;
|
||||||
|
if (mca_cfg.bootlog != 0)
|
||||||
|
mca_cfg.panic_timeout = 30;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
@ -2275,8 +2238,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
|
||||||
|
|
||||||
__mcheck_cpu_cap_init();
|
__mcheck_cpu_cap_init();
|
||||||
|
|
||||||
__mcheck_cpu_apply_quirks(c);
|
|
||||||
|
|
||||||
if (!mce_gen_pool_init()) {
|
if (!mce_gen_pool_init()) {
|
||||||
mca_cfg.disabled = 1;
|
mca_cfg.disabled = 1;
|
||||||
pr_emerg("Couldn't allocate MCE records pool!\n");
|
pr_emerg("Couldn't allocate MCE records pool!\n");
|
||||||
|
|
|
||||||
|
|
@ -468,8 +468,26 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void intel_apply_cpu_quirks(struct cpuinfo_x86 *c)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* SDM documents that on family 6 bank 0 should not be written
|
||||||
|
* because it aliases to another special BIOS controlled
|
||||||
|
* register.
|
||||||
|
* But it's not aliased anymore on model 0x1a+
|
||||||
|
* Don't ignore bank 0 completely because there could be a
|
||||||
|
* valid event later, merely don't write CTL0.
|
||||||
|
*
|
||||||
|
* Older CPUs (prior to family 6) can't reach this point and already
|
||||||
|
* return early due to the check of __mcheck_cpu_ancient_init().
|
||||||
|
*/
|
||||||
|
if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
|
||||||
|
this_cpu_ptr(mce_banks_array)[0].init = false;
|
||||||
|
}
|
||||||
|
|
||||||
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
void mce_intel_feature_init(struct cpuinfo_x86 *c)
|
||||||
{
|
{
|
||||||
|
intel_apply_cpu_quirks(c);
|
||||||
intel_init_cmci();
|
intel_init_cmci();
|
||||||
intel_init_lmce();
|
intel_init_lmce();
|
||||||
intel_imc_init(c);
|
intel_imc_init(c);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue