mirror of https://github.com/torvalds/linux.git
- Add support for new AMD family 0x1a models to amd64_edac
- Add an EDAC driver for the AMD VersalNET memory controller which
reports hw errors from different IP blocks in the fabric using an
IPC-type transport
- Drop the silly static number of memory controllers in the Intel EDAC
drivers (skx, i10nm) in favor of a flexible array so that former
doesn't need to be increased with every new generation which adds more
memory controllers; along with a proper refactoring
- Add support for two Alder Lake-S SOCs to ie31200_edac
- Add an EDAC driver for ADM Cortex A72 cores, and specifically for
reporting L1 and L2 cache errors
- Last but not least, the usual fixes, cleanups and improvements all
over the subsystem
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmjWYXEACgkQEsHwGGHe
VUqN0g/+KaDOP5caif7/5IJ2fL+9Qv3VvbxucVMS4UgMYBY21V4msfPkuCg8iGes
zEpFUuEFc2NE6XV9i4JNgYNAR+uffOY4rZb67VSr2rQSVeRvBFHb9aMsXBYssV/r
XtCfTdJL/bJ7SLk10aWvBM4quLF9BchdoPctNMt5PuN3dtb1dVFi1TkylXKaocRX
sfu/hOQ0FUbOlYnGTpW+t4TufNcWzC8q9hL4mrbSVHS3XTKk/zQ9PJ8I8f44XqYo
Bn1JXfAErkgo9rqlmjxU90Lg2G+EV+qwDWs61Ox8q3lzbC+9FOd4WIbD3c9TiTT/
Io6tx8PvgFUz43lD+XGoCfd87ZI9CbGoVAEEiFWr+HaqL/XVF5NS5GiBNTyxGGaP
nDzxm1OYQbDEnBfmaWZCMbbd5yCOZ1EZHTgp4VxqJfooU1Ucbct4oPDnERMTNDlv
UUGDh19BAXwcZ9xpy36AIprppZKOBu0WPjXee9sby5cF+KB57Tbrzd+nm/uZRhHj
bTkQTfCcs+EPAksG0snGufy4BlfS6UGqx4HkSZ3ITVJQX4x27razsxTDbKDk50jq
S1cyZlZ5n+mpR0MtC/zNDMB6cxutgAKoqwssVBUiEh9bCaA/tOPqJmoD9Lx2ESDt
0/QcF1ilBRctunguhDbY8EZKye9gM4WWHW5kxE29PtAzepSdTDo=
=4oCR
-----END PGP SIGNATURE-----
Merge tag 'edac_updates_for_v6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras
Pull EDAC updates from Borislav Petkov:
- Add support for new AMD family 0x1a models to amd64_edac
- Add an EDAC driver for the AMD VersalNET memory controller which
reports hw errors from different IP blocks in the fabric using an
IPC-type transport
- Drop the silly static number of memory controllers in the Intel EDAC
drivers (skx, i10nm) in favor of a flexible array so that former
doesn't need to be increased with every new generation which adds
more memory controllers; along with a proper refactoring
- Add support for two Alder Lake-S SOCs to ie31200_edac
- Add an EDAC driver for ADM Cortex A72 cores, and specifically for
reporting L1 and L2 cache errors
- Last but not least, the usual fixes, cleanups and improvements all
over the subsystem
* tag 'edac_updates_for_v6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras: (23 commits)
EDAC/versalnet: Return the correct error in mc_probe()
EDAC/mc_sysfs: Increase legacy channel support to 16
EDAC/amd64: Add support for AMD family 1Ah-based newer models
EDAC: Add a driver for the AMD Versal NET DDR controller
dt-bindings: memory-controllers: Add support for Versal NET EDAC
RAS: Export log_non_standard_event() to drivers
cdx: Export Symbols for MCDI RPC and Initialization
cdx: Split mcdi.h and reorganize headers
EDAC/skx_common: Use topology_physical_package_id() instead of open coding
EDAC: Fix wrong executable file modes for C source files
EDAC/altera: Use dev_fwnode()
EDAC/skx_common: Remove unused *NUM*_IMC macros
EDAC/i10nm: Reallocate skx_dev list if preconfigured cnt != runtime cnt
EDAC/skx_common: Remove redundant upper bound check for res->imc
EDAC/skx_common: Make skx_dev->imc[] a flexible array
EDAC/skx_common: Swap memory controller index mapping
EDAC/skx_common: Move mc_mapping to be a field inside struct skx_imc
EDAC/{skx_common,skx}: Use configuration data, not global macros
EDAC/i10nm: Skip DIMM enumeration on a disabled memory controller
EDAC/ie31200: Add two more Intel Alder Lake-S SoCs for EDAC support
...
This commit is contained in:
commit
03f76ddff5
|
|
@ -353,6 +353,12 @@ properties:
|
|||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description: Link to Mediatek Cache Coherent Interconnect
|
||||
|
||||
edac-enabled:
|
||||
$ref: /schemas/types.yaml#/definitions/flag
|
||||
description:
|
||||
A72 CPUs support Error Detection And Correction (EDAC) on their L1 and
|
||||
L2 caches. This flag marks this function as usable.
|
||||
|
||||
qcom,saw:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description:
|
||||
|
|
@ -399,6 +405,17 @@ properties:
|
|||
allOf:
|
||||
- $ref: /schemas/cpu.yaml#
|
||||
- $ref: /schemas/opp/opp-v1.yaml#
|
||||
- if:
|
||||
not:
|
||||
properties:
|
||||
compatible:
|
||||
contains:
|
||||
const: arm,cortex-a72
|
||||
then:
|
||||
# Allow edac-enabled only for Cortex A72
|
||||
properties:
|
||||
edac-enabled: false
|
||||
|
||||
- if:
|
||||
# If the enable-method property contains one of those values
|
||||
properties:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/memory-controllers/xlnx,versal-net-ddrmc5.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Xilinx Versal NET Memory Controller
|
||||
|
||||
maintainers:
|
||||
- Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
|
||||
|
||||
description:
|
||||
The integrated DDR Memory Controllers (DDRMCs) support both DDR5 and LPDDR5
|
||||
compact and extended memory interfaces. Versal NET DDR memory controller
|
||||
has an optional ECC support which correct single bit ECC errors and detect
|
||||
double bit ECC errors. It also has support for reporting other errors like
|
||||
MMCM (Mixed-Mode Clock Manager) errors and General software errors.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
const: xlnx,versal-net-ddrmc5
|
||||
|
||||
amd,rproc:
|
||||
$ref: /schemas/types.yaml#/definitions/phandle
|
||||
description:
|
||||
phandle to the remoteproc_r5 rproc node using which APU interacts
|
||||
with remote processor. APU primarily communicates with the RPU for
|
||||
accessing the DDRMC address space and getting error notification.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- amd,rproc
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
memory-controller {
|
||||
compatible = "xlnx,versal-net-ddrmc5";
|
||||
amd,rproc = <&remoteproc_r5>;
|
||||
};
|
||||
17
MAINTAINERS
17
MAINTAINERS
|
|
@ -8745,9 +8745,6 @@ F: drivers/edac/thunderx_edac*
|
|||
EDAC-CORE
|
||||
M: Borislav Petkov <bp@alien8.de>
|
||||
M: Tony Luck <tony.luck@intel.com>
|
||||
R: James Morse <james.morse@arm.com>
|
||||
R: Mauro Carvalho Chehab <mchehab@kernel.org>
|
||||
R: Robert Richter <rric@kernel.org>
|
||||
L: linux-edac@vger.kernel.org
|
||||
S: Supported
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git edac-for-next
|
||||
|
|
@ -8755,6 +8752,13 @@ F: Documentation/driver-api/edac.rst
|
|||
F: drivers/edac/
|
||||
F: include/linux/edac.h
|
||||
|
||||
EDAC-A72
|
||||
M: Vijay Balakrishna <vijayb@linux.microsoft.com>
|
||||
M: Tyler Hicks <code@tyhicks.com>
|
||||
L: linux-edac@vger.kernel.org
|
||||
S: Supported
|
||||
F: drivers/edac/a72_edac.c
|
||||
|
||||
EDAC-DMC520
|
||||
M: Lei Wang <lewan@microsoft.com>
|
||||
L: linux-edac@vger.kernel.org
|
||||
|
|
@ -27675,6 +27679,13 @@ S: Maintained
|
|||
F: Documentation/devicetree/bindings/memory-controllers/xlnx,versal-ddrmc-edac.yaml
|
||||
F: drivers/edac/versal_edac.c
|
||||
|
||||
XILINX VERSALNET EDAC DRIVER
|
||||
M: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
|
||||
S: Maintained
|
||||
F: Documentation/devicetree/bindings/memory-controllers/xlnx,versal-net-ddrmc5.yaml
|
||||
F: drivers/edac/versalnet_edac.c
|
||||
F: include/linux/cdx/edac_cdx_pcol.h
|
||||
|
||||
XILINX WATCHDOG DRIVER
|
||||
M: Srinivas Neeli <srinivas.neeli@amd.com>
|
||||
R: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
#include "cdx_controller.h"
|
||||
#include "../cdx.h"
|
||||
#include "mcdi_functions.h"
|
||||
#include "mcdi.h"
|
||||
#include "mcdid.h"
|
||||
|
||||
static unsigned int cdx_mcdi_rpc_timeout(struct cdx_mcdi *cdx, unsigned int cmd)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
#include "../cdx.h"
|
||||
#include "cdx_controller.h"
|
||||
#include "mcdi_functions.h"
|
||||
#include "mcdi.h"
|
||||
#include "mcdid.h"
|
||||
|
||||
static struct rpmsg_device_id cdx_rpmsg_id_table[] = {
|
||||
{ .name = "mcdi_ipc" },
|
||||
|
|
|
|||
|
|
@ -23,9 +23,10 @@
|
|||
#include <linux/log2.h>
|
||||
#include <linux/net_tstamp.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/cdx/bitfield.h>
|
||||
|
||||
#include "bitfield.h"
|
||||
#include "mcdi.h"
|
||||
#include <linux/cdx/mcdi.h>
|
||||
#include "mcdid.h"
|
||||
|
||||
static void cdx_mcdi_cancel_cmd(struct cdx_mcdi *cdx, struct cdx_mcdi_cmd *cmd);
|
||||
static void cdx_mcdi_wait_for_cleanup(struct cdx_mcdi *cdx);
|
||||
|
|
@ -99,6 +100,19 @@ static unsigned long cdx_mcdi_rpc_timeout(struct cdx_mcdi *cdx, unsigned int cmd
|
|||
return cdx->mcdi_ops->mcdi_rpc_timeout(cdx, cmd);
|
||||
}
|
||||
|
||||
/**
|
||||
* cdx_mcdi_init - Initialize MCDI (Management Controller Driver Interface) state
|
||||
* @cdx: Handle to the CDX MCDI structure
|
||||
*
|
||||
* This function allocates and initializes internal MCDI structures and resources
|
||||
* for the CDX device, including the workqueue, locking primitives, and command
|
||||
* tracking mechanisms. It sets the initial operating mode and prepares the device
|
||||
* for MCDI operations.
|
||||
*
|
||||
* Return:
|
||||
* * 0 - on success
|
||||
* * -ENOMEM - if memory allocation or workqueue creation fails
|
||||
*/
|
||||
int cdx_mcdi_init(struct cdx_mcdi *cdx)
|
||||
{
|
||||
struct cdx_mcdi_iface *mcdi;
|
||||
|
|
@ -128,7 +142,16 @@ int cdx_mcdi_init(struct cdx_mcdi *cdx)
|
|||
fail:
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cdx_mcdi_init);
|
||||
|
||||
/**
|
||||
* cdx_mcdi_finish - Cleanup MCDI (Management Controller Driver Interface) state
|
||||
* @cdx: Handle to the CDX MCDI structure
|
||||
*
|
||||
* This function is responsible for cleaning up the MCDI (Management Controller Driver Interface)
|
||||
* resources associated with a cdx_mcdi structure. Also destroys the mcdi workqueue.
|
||||
*
|
||||
*/
|
||||
void cdx_mcdi_finish(struct cdx_mcdi *cdx)
|
||||
{
|
||||
struct cdx_mcdi_iface *mcdi;
|
||||
|
|
@ -143,6 +166,7 @@ void cdx_mcdi_finish(struct cdx_mcdi *cdx)
|
|||
kfree(cdx->mcdi);
|
||||
cdx->mcdi = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cdx_mcdi_finish);
|
||||
|
||||
static bool cdx_mcdi_flushed(struct cdx_mcdi_iface *mcdi, bool ignore_cleanups)
|
||||
{
|
||||
|
|
@ -553,6 +577,19 @@ static void cdx_mcdi_start_or_queue(struct cdx_mcdi_iface *mcdi,
|
|||
cdx_mcdi_cmd_start_or_queue(mcdi, cmd);
|
||||
}
|
||||
|
||||
/**
|
||||
* cdx_mcdi_process_cmd - Process an incoming MCDI response
|
||||
* @cdx: Handle to the CDX MCDI structure
|
||||
* @outbuf: Pointer to the response buffer received from the management controller
|
||||
* @len: Length of the response buffer in bytes
|
||||
*
|
||||
* This function handles a response from the management controller. It locates the
|
||||
* corresponding command using the sequence number embedded in the header,
|
||||
* completes the command if it is still pending, and initiates any necessary cleanup.
|
||||
*
|
||||
* The function assumes that the response buffer is well-formed and at least one
|
||||
* dword in size.
|
||||
*/
|
||||
void cdx_mcdi_process_cmd(struct cdx_mcdi *cdx, struct cdx_dword *outbuf, int len)
|
||||
{
|
||||
struct cdx_mcdi_iface *mcdi;
|
||||
|
|
@ -590,6 +627,7 @@ void cdx_mcdi_process_cmd(struct cdx_mcdi *cdx, struct cdx_dword *outbuf, int le
|
|||
|
||||
cdx_mcdi_process_cleanup_list(mcdi->cdx, &cleanup_list);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cdx_mcdi_process_cmd);
|
||||
|
||||
static void cdx_mcdi_cmd_work(struct work_struct *context)
|
||||
{
|
||||
|
|
@ -757,6 +795,7 @@ int cdx_mcdi_rpc(struct cdx_mcdi *cdx, unsigned int cmd,
|
|||
return cdx_mcdi_rpc_sync(cdx, cmd, inbuf, inlen, outbuf, outlen,
|
||||
outlen_actual, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(cdx_mcdi_rpc);
|
||||
|
||||
/**
|
||||
* cdx_mcdi_rpc_async - Schedule an MCDI command to run asynchronously
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "mcdi.h"
|
||||
#include "mcdi_functions.h"
|
||||
|
||||
int cdx_mcdi_get_num_buses(struct cdx_mcdi *cdx)
|
||||
|
|
|
|||
|
|
@ -8,7 +8,8 @@
|
|||
#ifndef CDX_MCDI_FUNCTIONS_H
|
||||
#define CDX_MCDI_FUNCTIONS_H
|
||||
|
||||
#include "mcdi.h"
|
||||
#include <linux/cdx/mcdi.h>
|
||||
#include "mcdid.h"
|
||||
#include "../cdx.h"
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,63 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright 2008-2013 Solarflare Communications Inc.
|
||||
* Copyright (C) 2022-2025, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef CDX_MCDID_H
|
||||
#define CDX_MCDID_H
|
||||
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/rpmsg.h>
|
||||
|
||||
#include "mc_cdx_pcol.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
#define CDX_WARN_ON_ONCE_PARANOID(x) WARN_ON_ONCE(x)
|
||||
#define CDX_WARN_ON_PARANOID(x) WARN_ON(x)
|
||||
#else
|
||||
#define CDX_WARN_ON_ONCE_PARANOID(x) do {} while (0)
|
||||
#define CDX_WARN_ON_PARANOID(x) do {} while (0)
|
||||
#endif
|
||||
|
||||
#define MCDI_BUF_LEN (8 + MCDI_CTL_SDU_LEN_MAX)
|
||||
|
||||
static inline struct cdx_mcdi_iface *cdx_mcdi_if(struct cdx_mcdi *cdx)
|
||||
{
|
||||
return cdx->mcdi ? &cdx->mcdi->iface : NULL;
|
||||
}
|
||||
|
||||
int cdx_mcdi_rpc_async(struct cdx_mcdi *cdx, unsigned int cmd,
|
||||
const struct cdx_dword *inbuf, size_t inlen,
|
||||
cdx_mcdi_async_completer *complete,
|
||||
unsigned long cookie);
|
||||
int cdx_mcdi_wait_for_quiescence(struct cdx_mcdi *cdx,
|
||||
unsigned int timeout_jiffies);
|
||||
|
||||
/*
|
||||
* We expect that 16- and 32-bit fields in MCDI requests and responses
|
||||
* are appropriately aligned, but 64-bit fields are only
|
||||
* 32-bit-aligned.
|
||||
*/
|
||||
#define MCDI_BYTE(_buf, _field) \
|
||||
((void)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 1), \
|
||||
*MCDI_PTR(_buf, _field))
|
||||
#define MCDI_WORD(_buf, _field) \
|
||||
((void)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 2), \
|
||||
le16_to_cpu(*(__force const __le16 *)MCDI_PTR(_buf, _field)))
|
||||
#define MCDI_POPULATE_DWORD_1(_buf, _field, _name1, _value1) \
|
||||
CDX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), \
|
||||
MC_CMD_ ## _name1, _value1)
|
||||
#define MCDI_SET_QWORD(_buf, _field, _value) \
|
||||
do { \
|
||||
CDX_POPULATE_DWORD_1(_MCDI_DWORD(_buf, _field)[0], \
|
||||
CDX_DWORD, (u32)(_value)); \
|
||||
CDX_POPULATE_DWORD_1(_MCDI_DWORD(_buf, _field)[1], \
|
||||
CDX_DWORD, (u64)(_value) >> 32); \
|
||||
} while (0)
|
||||
#define MCDI_QWORD(_buf, _field) \
|
||||
(CDX_DWORD_FIELD(_MCDI_DWORD(_buf, _field)[0], CDX_DWORD) | \
|
||||
(u64)CDX_DWORD_FIELD(_MCDI_DWORD(_buf, _field)[1], CDX_DWORD) << 32)
|
||||
|
||||
#endif /* CDX_MCDID_H */
|
||||
|
|
@ -576,4 +576,20 @@ config EDAC_LOONGSON
|
|||
errors (CE) only. Loongson-3A5000/3C5000/3D5000/3A6000/3C6000
|
||||
are compatible.
|
||||
|
||||
config EDAC_CORTEX_A72
|
||||
tristate "ARM Cortex A72"
|
||||
depends on ARM64
|
||||
help
|
||||
Support for L1/L2 cache error detection for ARM Cortex A72 processor.
|
||||
The detected and reported errors are from reading CPU/L2 memory error
|
||||
syndrome registers.
|
||||
|
||||
config EDAC_VERSALNET
|
||||
tristate "AMD VersalNET DDR Controller"
|
||||
depends on CDX_CONTROLLER && ARCH_ZYNQMP
|
||||
help
|
||||
Support for single bit error correction, double bit error detection
|
||||
and other system errors from various IP subsystems like RPU, NOCs,
|
||||
HNICX, PL on the AMD Versal NET DDR memory controller.
|
||||
|
||||
endif # EDAC
|
||||
|
|
|
|||
|
|
@ -88,3 +88,5 @@ obj-$(CONFIG_EDAC_NPCM) += npcm_edac.o
|
|||
obj-$(CONFIG_EDAC_ZYNQMP) += zynqmp_edac.o
|
||||
obj-$(CONFIG_EDAC_VERSAL) += versal_edac.o
|
||||
obj-$(CONFIG_EDAC_LOONGSON) += loongson_edac.o
|
||||
obj-$(CONFIG_EDAC_VERSALNET) += versalnet_edac.o
|
||||
obj-$(CONFIG_EDAC_CORTEX_A72) += a72_edac.o
|
||||
|
|
|
|||
|
|
@ -0,0 +1,225 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Cortex A72 EDAC L1 and L2 cache error detection
|
||||
*
|
||||
* Copyright (c) 2020 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
|
||||
* Copyright (c) 2025 Microsoft Corporation, <vijayb@linux.microsoft.com>
|
||||
*
|
||||
* Based on Code from:
|
||||
* Copyright (c) 2018, NXP Semiconductor
|
||||
* Author: York Sun <york.sun@nxp.com>
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <asm/smp_plat.h>
|
||||
|
||||
#include "edac_module.h"
|
||||
|
||||
#define DRVNAME "a72-edac"
|
||||
|
||||
#define SYS_CPUMERRSR_EL1 sys_reg(3, 1, 15, 2, 2)
|
||||
#define SYS_L2MERRSR_EL1 sys_reg(3, 1, 15, 2, 3)
|
||||
|
||||
#define CPUMERRSR_EL1_RAMID GENMASK(30, 24)
|
||||
#define L2MERRSR_EL1_CPUID_WAY GENMASK(21, 18)
|
||||
|
||||
#define CPUMERRSR_EL1_VALID BIT(31)
|
||||
#define CPUMERRSR_EL1_FATAL BIT(63)
|
||||
#define L2MERRSR_EL1_VALID BIT(31)
|
||||
#define L2MERRSR_EL1_FATAL BIT(63)
|
||||
|
||||
#define L1_I_TAG_RAM 0x00
|
||||
#define L1_I_DATA_RAM 0x01
|
||||
#define L1_D_TAG_RAM 0x08
|
||||
#define L1_D_DATA_RAM 0x09
|
||||
#define TLB_RAM 0x18
|
||||
|
||||
#define MESSAGE_SIZE 64
|
||||
|
||||
struct mem_err_synd_reg {
|
||||
u64 cpu_mesr;
|
||||
u64 l2_mesr;
|
||||
};
|
||||
|
||||
static struct cpumask compat_mask;
|
||||
|
||||
static void report_errors(struct edac_device_ctl_info *edac_ctl, int cpu,
|
||||
struct mem_err_synd_reg *mesr)
|
||||
{
|
||||
u64 cpu_mesr = mesr->cpu_mesr;
|
||||
u64 l2_mesr = mesr->l2_mesr;
|
||||
char msg[MESSAGE_SIZE];
|
||||
|
||||
if (cpu_mesr & CPUMERRSR_EL1_VALID) {
|
||||
const char *str;
|
||||
bool fatal = cpu_mesr & CPUMERRSR_EL1_FATAL;
|
||||
|
||||
switch (FIELD_GET(CPUMERRSR_EL1_RAMID, cpu_mesr)) {
|
||||
case L1_I_TAG_RAM:
|
||||
str = "L1-I Tag RAM";
|
||||
break;
|
||||
case L1_I_DATA_RAM:
|
||||
str = "L1-I Data RAM";
|
||||
break;
|
||||
case L1_D_TAG_RAM:
|
||||
str = "L1-D Tag RAM";
|
||||
break;
|
||||
case L1_D_DATA_RAM:
|
||||
str = "L1-D Data RAM";
|
||||
break;
|
||||
case TLB_RAM:
|
||||
str = "TLB RAM";
|
||||
break;
|
||||
default:
|
||||
str = "Unspecified";
|
||||
break;
|
||||
}
|
||||
|
||||
snprintf(msg, MESSAGE_SIZE, "%s %s error(s) on CPU %d",
|
||||
str, fatal ? "fatal" : "correctable", cpu);
|
||||
|
||||
if (fatal)
|
||||
edac_device_handle_ue(edac_ctl, cpu, 0, msg);
|
||||
else
|
||||
edac_device_handle_ce(edac_ctl, cpu, 0, msg);
|
||||
}
|
||||
|
||||
if (l2_mesr & L2MERRSR_EL1_VALID) {
|
||||
bool fatal = l2_mesr & L2MERRSR_EL1_FATAL;
|
||||
|
||||
snprintf(msg, MESSAGE_SIZE, "L2 %s error(s) on CPU %d CPUID/WAY 0x%lx",
|
||||
fatal ? "fatal" : "correctable", cpu,
|
||||
FIELD_GET(L2MERRSR_EL1_CPUID_WAY, l2_mesr));
|
||||
if (fatal)
|
||||
edac_device_handle_ue(edac_ctl, cpu, 1, msg);
|
||||
else
|
||||
edac_device_handle_ce(edac_ctl, cpu, 1, msg);
|
||||
}
|
||||
}
|
||||
|
||||
static void read_errors(void *data)
|
||||
{
|
||||
struct mem_err_synd_reg *mesr = data;
|
||||
|
||||
mesr->cpu_mesr = read_sysreg_s(SYS_CPUMERRSR_EL1);
|
||||
if (mesr->cpu_mesr & CPUMERRSR_EL1_VALID) {
|
||||
write_sysreg_s(0, SYS_CPUMERRSR_EL1);
|
||||
isb();
|
||||
}
|
||||
mesr->l2_mesr = read_sysreg_s(SYS_L2MERRSR_EL1);
|
||||
if (mesr->l2_mesr & L2MERRSR_EL1_VALID) {
|
||||
write_sysreg_s(0, SYS_L2MERRSR_EL1);
|
||||
isb();
|
||||
}
|
||||
}
|
||||
|
||||
static void a72_edac_check(struct edac_device_ctl_info *edac_ctl)
|
||||
{
|
||||
struct mem_err_synd_reg mesr;
|
||||
int cpu;
|
||||
|
||||
cpus_read_lock();
|
||||
for_each_cpu_and(cpu, cpu_online_mask, &compat_mask) {
|
||||
smp_call_function_single(cpu, read_errors, &mesr, true);
|
||||
report_errors(edac_ctl, cpu, &mesr);
|
||||
}
|
||||
cpus_read_unlock();
|
||||
}
|
||||
|
||||
static int a72_edac_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct edac_device_ctl_info *edac_ctl;
|
||||
struct device *dev = &pdev->dev;
|
||||
int rc;
|
||||
|
||||
edac_ctl = edac_device_alloc_ctl_info(0, "cpu",
|
||||
num_possible_cpus(), "L", 2, 1,
|
||||
edac_device_alloc_index());
|
||||
if (!edac_ctl)
|
||||
return -ENOMEM;
|
||||
|
||||
edac_ctl->edac_check = a72_edac_check;
|
||||
edac_ctl->dev = dev;
|
||||
edac_ctl->mod_name = dev_name(dev);
|
||||
edac_ctl->dev_name = dev_name(dev);
|
||||
edac_ctl->ctl_name = DRVNAME;
|
||||
dev_set_drvdata(dev, edac_ctl);
|
||||
|
||||
rc = edac_device_add_device(edac_ctl);
|
||||
if (rc)
|
||||
goto out_dev;
|
||||
|
||||
return 0;
|
||||
|
||||
out_dev:
|
||||
edac_device_free_ctl_info(edac_ctl);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void a72_edac_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct edac_device_ctl_info *edac_ctl = dev_get_drvdata(&pdev->dev);
|
||||
|
||||
edac_device_del_device(edac_ctl->dev);
|
||||
edac_device_free_ctl_info(edac_ctl);
|
||||
}
|
||||
|
||||
static const struct of_device_id cortex_arm64_edac_of_match[] = {
|
||||
{ .compatible = "arm,cortex-a72" },
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match);
|
||||
|
||||
static struct platform_driver a72_edac_driver = {
|
||||
.probe = a72_edac_probe,
|
||||
.remove = a72_edac_remove,
|
||||
.driver = {
|
||||
.name = DRVNAME,
|
||||
},
|
||||
};
|
||||
|
||||
static struct platform_device *a72_pdev;
|
||||
|
||||
static int __init a72_edac_driver_init(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
|
||||
if (np) {
|
||||
if (of_match_node(cortex_arm64_edac_of_match, np) &&
|
||||
of_property_read_bool(np, "edac-enabled")) {
|
||||
cpumask_set_cpu(cpu, &compat_mask);
|
||||
}
|
||||
} else {
|
||||
pr_warn("failed to find device node for CPU %d\n", cpu);
|
||||
}
|
||||
}
|
||||
|
||||
if (cpumask_empty(&compat_mask))
|
||||
return 0;
|
||||
|
||||
a72_pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0);
|
||||
if (IS_ERR(a72_pdev)) {
|
||||
pr_err("failed to register A72 EDAC device\n");
|
||||
return PTR_ERR(a72_pdev);
|
||||
}
|
||||
|
||||
return platform_driver_register(&a72_edac_driver);
|
||||
}
|
||||
|
||||
static void __exit a72_edac_driver_exit(void)
|
||||
{
|
||||
platform_device_unregister(a72_pdev);
|
||||
platform_driver_unregister(&a72_edac_driver);
|
||||
}
|
||||
|
||||
module_init(a72_edac_driver_init);
|
||||
module_exit(a72_edac_driver_exit);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
|
||||
MODULE_DESCRIPTION("Cortex A72 L1 and L2 cache EDAC driver");
|
||||
|
|
@ -2130,8 +2130,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
|
|||
edac->irq_chip.name = pdev->dev.of_node->name;
|
||||
edac->irq_chip.irq_mask = a10_eccmgr_irq_mask;
|
||||
edac->irq_chip.irq_unmask = a10_eccmgr_irq_unmask;
|
||||
edac->domain = irq_domain_create_linear(of_fwnode_handle(pdev->dev.of_node),
|
||||
64, &a10_eccmgr_ic_ops, edac);
|
||||
edac->domain = irq_domain_create_linear(dev_fwnode(&pdev->dev), 64, &a10_eccmgr_ic_ops,
|
||||
edac);
|
||||
if (!edac->domain) {
|
||||
dev_err(&pdev->dev, "Error adding IRQ domain\n");
|
||||
return -ENOMEM;
|
||||
|
|
|
|||
|
|
@ -3923,6 +3923,26 @@ static int per_family_init(struct amd64_pvt *pvt)
|
|||
pvt->ctl_name = "F1Ah_M40h";
|
||||
pvt->flags.zn_regs_v2 = 1;
|
||||
break;
|
||||
case 0x50 ... 0x57:
|
||||
pvt->ctl_name = "F1Ah_M50h";
|
||||
pvt->max_mcs = 16;
|
||||
pvt->flags.zn_regs_v2 = 1;
|
||||
break;
|
||||
case 0x90 ... 0x9f:
|
||||
pvt->ctl_name = "F1Ah_M90h";
|
||||
pvt->max_mcs = 8;
|
||||
pvt->flags.zn_regs_v2 = 1;
|
||||
break;
|
||||
case 0xa0 ... 0xaf:
|
||||
pvt->ctl_name = "F1Ah_MA0h";
|
||||
pvt->max_mcs = 8;
|
||||
pvt->flags.zn_regs_v2 = 1;
|
||||
break;
|
||||
case 0xc0 ... 0xc7:
|
||||
pvt->ctl_name = "F1Ah_MC0h";
|
||||
pvt->max_mcs = 16;
|
||||
pvt->flags.zn_regs_v2 = 1;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@
|
|||
/* Hardware limit on ChipSelect rows per MC and processors per system */
|
||||
#define NUM_CHIPSELECTS 8
|
||||
#define DRAM_RANGES 8
|
||||
#define NUM_CONTROLLERS 12
|
||||
#define NUM_CONTROLLERS 16
|
||||
|
||||
#define ON true
|
||||
#define OFF false
|
||||
|
|
|
|||
|
|
@ -305,6 +305,14 @@ DEVICE_CHANNEL(ch10_dimm_label, S_IRUGO | S_IWUSR,
|
|||
channel_dimm_label_show, channel_dimm_label_store, 10);
|
||||
DEVICE_CHANNEL(ch11_dimm_label, S_IRUGO | S_IWUSR,
|
||||
channel_dimm_label_show, channel_dimm_label_store, 11);
|
||||
DEVICE_CHANNEL(ch12_dimm_label, S_IRUGO | S_IWUSR,
|
||||
channel_dimm_label_show, channel_dimm_label_store, 12);
|
||||
DEVICE_CHANNEL(ch13_dimm_label, S_IRUGO | S_IWUSR,
|
||||
channel_dimm_label_show, channel_dimm_label_store, 13);
|
||||
DEVICE_CHANNEL(ch14_dimm_label, S_IRUGO | S_IWUSR,
|
||||
channel_dimm_label_show, channel_dimm_label_store, 14);
|
||||
DEVICE_CHANNEL(ch15_dimm_label, S_IRUGO | S_IWUSR,
|
||||
channel_dimm_label_show, channel_dimm_label_store, 15);
|
||||
|
||||
/* Total possible dynamic DIMM Label attribute file table */
|
||||
static struct attribute *dynamic_csrow_dimm_attr[] = {
|
||||
|
|
@ -320,6 +328,10 @@ static struct attribute *dynamic_csrow_dimm_attr[] = {
|
|||
&dev_attr_legacy_ch9_dimm_label.attr.attr,
|
||||
&dev_attr_legacy_ch10_dimm_label.attr.attr,
|
||||
&dev_attr_legacy_ch11_dimm_label.attr.attr,
|
||||
&dev_attr_legacy_ch12_dimm_label.attr.attr,
|
||||
&dev_attr_legacy_ch13_dimm_label.attr.attr,
|
||||
&dev_attr_legacy_ch14_dimm_label.attr.attr,
|
||||
&dev_attr_legacy_ch15_dimm_label.attr.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
|
@ -348,6 +360,14 @@ DEVICE_CHANNEL(ch10_ce_count, S_IRUGO,
|
|||
channel_ce_count_show, NULL, 10);
|
||||
DEVICE_CHANNEL(ch11_ce_count, S_IRUGO,
|
||||
channel_ce_count_show, NULL, 11);
|
||||
DEVICE_CHANNEL(ch12_ce_count, S_IRUGO,
|
||||
channel_ce_count_show, NULL, 12);
|
||||
DEVICE_CHANNEL(ch13_ce_count, S_IRUGO,
|
||||
channel_ce_count_show, NULL, 13);
|
||||
DEVICE_CHANNEL(ch14_ce_count, S_IRUGO,
|
||||
channel_ce_count_show, NULL, 14);
|
||||
DEVICE_CHANNEL(ch15_ce_count, S_IRUGO,
|
||||
channel_ce_count_show, NULL, 15);
|
||||
|
||||
/* Total possible dynamic ce_count attribute file table */
|
||||
static struct attribute *dynamic_csrow_ce_count_attr[] = {
|
||||
|
|
@ -363,6 +383,10 @@ static struct attribute *dynamic_csrow_ce_count_attr[] = {
|
|||
&dev_attr_legacy_ch9_ce_count.attr.attr,
|
||||
&dev_attr_legacy_ch10_ce_count.attr.attr,
|
||||
&dev_attr_legacy_ch11_ce_count.attr.attr,
|
||||
&dev_attr_legacy_ch12_ce_count.attr.attr,
|
||||
&dev_attr_legacy_ch13_ce_count.attr.attr,
|
||||
&dev_attr_legacy_ch14_ce_count.attr.attr,
|
||||
&dev_attr_legacy_ch15_ce_count.attr.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -468,17 +468,18 @@ static int i10nm_get_imc_num(struct res_config *cfg)
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
if (imc_num > I10NM_NUM_DDR_IMC) {
|
||||
i10nm_printk(KERN_ERR, "Need to make I10NM_NUM_DDR_IMC >= %d\n", imc_num);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (cfg->ddr_imc_num != imc_num) {
|
||||
/*
|
||||
* Store the number of present DDR memory controllers.
|
||||
* Update the configuration data to reflect the number of
|
||||
* present DDR memory controllers.
|
||||
*/
|
||||
cfg->ddr_imc_num = imc_num;
|
||||
edac_dbg(2, "Set DDR MC number: %d", imc_num);
|
||||
|
||||
/* Release and reallocate skx_dev list with the updated number. */
|
||||
skx_remove();
|
||||
if (skx_get_all_bus_mappings(cfg, &i10nm_edac_list) <= 0)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
@ -1057,6 +1058,15 @@ static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
|
|||
return !!GET_BITFIELD(mcmtr, 2, 2);
|
||||
}
|
||||
|
||||
static bool i10nm_channel_disabled(struct skx_imc *imc, int chan)
|
||||
{
|
||||
u32 mcmtr = I10NM_GET_MCMTR(imc, chan);
|
||||
|
||||
edac_dbg(1, "mc%d ch%d mcmtr reg %x\n", imc->mc, chan, mcmtr);
|
||||
|
||||
return (mcmtr == ~0 || GET_BITFIELD(mcmtr, 18, 18));
|
||||
}
|
||||
|
||||
static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
|
||||
struct res_config *cfg)
|
||||
{
|
||||
|
|
@ -1070,6 +1080,11 @@ static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
|
|||
if (!imc->mbase)
|
||||
continue;
|
||||
|
||||
if (i10nm_channel_disabled(imc, i)) {
|
||||
edac_dbg(1, "mc%d ch%d is disabled.\n", imc->mc, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
ndimms = 0;
|
||||
|
||||
if (res_cfg->type != GNR)
|
||||
|
|
|
|||
|
|
@ -99,6 +99,8 @@
|
|||
|
||||
/* Alder Lake-S */
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1 0x4660
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2 0x4668 /* 8P+4E, e.g. i7-12700K */
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3 0x4648 /* 6P+4E, e.g. i5-12600K */
|
||||
|
||||
/* Bartlett Lake-S */
|
||||
#define PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1 0x4639
|
||||
|
|
@ -761,6 +763,8 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
|
|||
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_6), (kernel_ulong_t)&rpl_s_cfg},
|
||||
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_HX_1), (kernel_ulong_t)&rpl_s_cfg},
|
||||
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1), (kernel_ulong_t)&rpl_s_cfg},
|
||||
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_2), (kernel_ulong_t)&rpl_s_cfg},
|
||||
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_3), (kernel_ulong_t)&rpl_s_cfg},
|
||||
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_1), (kernel_ulong_t)&rpl_s_cfg},
|
||||
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_2), (kernel_ulong_t)&rpl_s_cfg},
|
||||
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_BTL_S_3), (kernel_ulong_t)&rpl_s_cfg},
|
||||
|
|
|
|||
|
|
@ -33,6 +33,15 @@ static unsigned int nvdimm_count;
|
|||
#define MASK26 0x3FFFFFF /* Mask for 2^26 */
|
||||
#define MASK29 0x1FFFFFFF /* Mask for 2^29 */
|
||||
|
||||
static struct res_config skx_cfg = {
|
||||
.type = SKX,
|
||||
.decs_did = 0x2016,
|
||||
.busno_cfg_offset = 0xcc,
|
||||
.ddr_imc_num = 2,
|
||||
.ddr_chan_num = 3,
|
||||
.ddr_dimm_num = 2,
|
||||
};
|
||||
|
||||
static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx)
|
||||
{
|
||||
struct skx_dev *d;
|
||||
|
|
@ -52,7 +61,7 @@ enum munittype {
|
|||
|
||||
struct munit {
|
||||
u16 did;
|
||||
u16 devfn[SKX_NUM_IMC];
|
||||
u16 devfn[2];
|
||||
u8 busidx;
|
||||
u8 per_socket;
|
||||
enum munittype mtype;
|
||||
|
|
@ -89,11 +98,11 @@ static int get_all_munits(const struct munit *m)
|
|||
if (!pdev)
|
||||
break;
|
||||
ndev++;
|
||||
if (m->per_socket == SKX_NUM_IMC) {
|
||||
for (i = 0; i < SKX_NUM_IMC; i++)
|
||||
if (m->per_socket == skx_cfg.ddr_imc_num) {
|
||||
for (i = 0; i < skx_cfg.ddr_imc_num; i++)
|
||||
if (m->devfn[i] == pdev->devfn)
|
||||
break;
|
||||
if (i == SKX_NUM_IMC)
|
||||
if (i == skx_cfg.ddr_imc_num)
|
||||
goto fail;
|
||||
}
|
||||
d = get_skx_dev(pdev->bus, m->busidx);
|
||||
|
|
@ -157,12 +166,6 @@ static int get_all_munits(const struct munit *m)
|
|||
return -ENODEV;
|
||||
}
|
||||
|
||||
static struct res_config skx_cfg = {
|
||||
.type = SKX,
|
||||
.decs_did = 0x2016,
|
||||
.busno_cfg_offset = 0xcc,
|
||||
};
|
||||
|
||||
static const struct x86_cpu_id skx_cpuids[] = {
|
||||
X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_cfg),
|
||||
{ }
|
||||
|
|
@ -186,11 +189,11 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
|
|||
/* Only the mcmtr on the first channel is effective */
|
||||
pci_read_config_dword(imc->chan[0].cdev, 0x87c, &mcmtr);
|
||||
|
||||
for (i = 0; i < SKX_NUM_CHANNELS; i++) {
|
||||
for (i = 0; i < cfg->ddr_chan_num; i++) {
|
||||
ndimms = 0;
|
||||
pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
|
||||
pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg);
|
||||
for (j = 0; j < SKX_NUM_DIMMS; j++) {
|
||||
for (j = 0; j < cfg->ddr_dimm_num; j++) {
|
||||
dimm = edac_get_dimm(mci, i, j, 0);
|
||||
pci_read_config_dword(imc->chan[i].cdev,
|
||||
0x80 + 4 * j, &mtr);
|
||||
|
|
@ -620,6 +623,7 @@ static int __init skx_init(void)
|
|||
return -ENODEV;
|
||||
|
||||
cfg = (struct res_config *)id->driver_data;
|
||||
skx_set_res_cfg(cfg);
|
||||
|
||||
rc = skx_get_hi_lo(0x2034, off, &skx_tolm, &skx_tohm);
|
||||
if (rc)
|
||||
|
|
@ -652,10 +656,13 @@ static int __init skx_init(void)
|
|||
goto fail;
|
||||
|
||||
edac_dbg(2, "src_id = %d\n", src_id);
|
||||
for (i = 0; i < SKX_NUM_IMC; i++) {
|
||||
for (i = 0; i < cfg->ddr_imc_num; i++) {
|
||||
d->imc[i].mc = mc++;
|
||||
d->imc[i].lmc = i;
|
||||
d->imc[i].src_id = src_id;
|
||||
d->imc[i].num_channels = cfg->ddr_chan_num;
|
||||
d->imc[i].num_dimms = cfg->ddr_dimm_num;
|
||||
|
||||
rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
|
||||
"Skylake Socket", EDAC_MOD_STR,
|
||||
skx_get_dimm_config, cfg);
|
||||
|
|
|
|||
|
|
@ -14,9 +14,11 @@
|
|||
* Copyright (c) 2018, Intel Corporation.
|
||||
*/
|
||||
|
||||
#include <linux/topology.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/adxl.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <acpi/nfit.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/uv/uv.h>
|
||||
|
|
@ -130,8 +132,8 @@ static void skx_init_mc_mapping(struct skx_dev *d)
|
|||
* the logical indices of the memory controllers enumerated by the
|
||||
* EDAC driver.
|
||||
*/
|
||||
for (int i = 0; i < NUM_IMC; i++)
|
||||
d->mc_mapping[i] = i;
|
||||
for (int i = 0; i < d->num_imc; i++)
|
||||
d->imc[i].mc_mapping = i;
|
||||
}
|
||||
|
||||
void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
|
||||
|
|
@ -139,22 +141,28 @@ void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
|
|||
edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n",
|
||||
pmc, lmc);
|
||||
|
||||
d->mc_mapping[pmc] = lmc;
|
||||
d->imc[lmc].mc_mapping = pmc;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(skx_set_mc_mapping);
|
||||
|
||||
static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc)
|
||||
static int skx_get_mc_mapping(struct skx_dev *d, u8 pmc)
|
||||
{
|
||||
edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n",
|
||||
pmc, d->mc_mapping[pmc]);
|
||||
for (int lmc = 0; lmc < d->num_imc; lmc++) {
|
||||
if (d->imc[lmc].mc_mapping == pmc) {
|
||||
edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n",
|
||||
pmc, lmc);
|
||||
|
||||
return d->mc_mapping[pmc];
|
||||
return lmc;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
|
||||
{
|
||||
int i, lmc, len = 0;
|
||||
struct skx_dev *d;
|
||||
int i, len = 0;
|
||||
|
||||
if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
|
||||
res->addr < BIT_ULL(32))) {
|
||||
|
|
@ -200,7 +208,7 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
|
|||
res->cs = (int)adxl_values[component_indices[INDEX_CS]];
|
||||
}
|
||||
|
||||
if (res->imc > NUM_IMC - 1 || res->imc < 0) {
|
||||
if (res->imc < 0) {
|
||||
skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
|
||||
return false;
|
||||
}
|
||||
|
|
@ -218,7 +226,13 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
|
|||
return false;
|
||||
}
|
||||
|
||||
res->imc = skx_get_mc_mapping(d, res->imc);
|
||||
lmc = skx_get_mc_mapping(d, res->imc);
|
||||
if (lmc < 0) {
|
||||
skx_printk(KERN_ERR, "No lmc for imc %d\n", res->imc);
|
||||
return false;
|
||||
}
|
||||
|
||||
res->imc = lmc;
|
||||
|
||||
for (i = 0; i < adxl_component_count; i++) {
|
||||
if (adxl_values[i] == ~0x0ull)
|
||||
|
|
@ -265,7 +279,7 @@ static int skx_get_pkg_id(struct skx_dev *d, u8 *id)
|
|||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
|
||||
if (c->initialized && cpu_to_node(cpu) == node) {
|
||||
*id = c->topo.pkg_id;
|
||||
*id = topology_physical_package_id(cpu);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
@ -320,10 +334,10 @@ static int get_width(u32 mtr)
|
|||
*/
|
||||
int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
|
||||
{
|
||||
int ndev = 0, imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num;
|
||||
struct pci_dev *pdev, *prev;
|
||||
struct skx_dev *d;
|
||||
u32 reg;
|
||||
int ndev = 0;
|
||||
|
||||
prev = NULL;
|
||||
for (;;) {
|
||||
|
|
@ -331,7 +345,7 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
|
|||
if (!pdev)
|
||||
break;
|
||||
ndev++;
|
||||
d = kzalloc(sizeof(*d), GFP_KERNEL);
|
||||
d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL);
|
||||
if (!d) {
|
||||
pci_dev_put(pdev);
|
||||
return -ENOMEM;
|
||||
|
|
@ -354,8 +368,10 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
|
|||
d->seg = GET_BITFIELD(reg, 16, 23);
|
||||
}
|
||||
|
||||
edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
|
||||
d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
|
||||
d->num_imc = imc_num;
|
||||
|
||||
edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x, imcs %d\n",
|
||||
d->bus[0], d->bus[1], d->bus[2], d->bus[3], imc_num);
|
||||
list_add_tail(&d->list, &dev_edac_list);
|
||||
prev = pdev;
|
||||
|
||||
|
|
@ -541,10 +557,10 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
|
|||
|
||||
/* Allocate a new MC control structure */
|
||||
layers[0].type = EDAC_MC_LAYER_CHANNEL;
|
||||
layers[0].size = NUM_CHANNELS;
|
||||
layers[0].size = imc->num_channels;
|
||||
layers[0].is_virt_csrow = false;
|
||||
layers[1].type = EDAC_MC_LAYER_SLOT;
|
||||
layers[1].size = NUM_DIMMS;
|
||||
layers[1].size = imc->num_dimms;
|
||||
layers[1].is_virt_csrow = true;
|
||||
mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
|
||||
sizeof(struct skx_pvt));
|
||||
|
|
@ -784,7 +800,7 @@ void skx_remove(void)
|
|||
|
||||
list_for_each_entry_safe(d, tmp, &dev_edac_list, list) {
|
||||
list_del(&d->list);
|
||||
for (i = 0; i < NUM_IMC; i++) {
|
||||
for (i = 0; i < d->num_imc; i++) {
|
||||
if (d->imc[i].mci)
|
||||
skx_unregister_mci(&d->imc[i]);
|
||||
|
||||
|
|
@ -794,7 +810,7 @@ void skx_remove(void)
|
|||
if (d->imc[i].mbase)
|
||||
iounmap(d->imc[i].mbase);
|
||||
|
||||
for (j = 0; j < NUM_CHANNELS; j++) {
|
||||
for (j = 0; j < d->imc[i].num_channels; j++) {
|
||||
if (d->imc[i].chan[j].cdev)
|
||||
pci_dev_put(d->imc[i].chan[j].cdev);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,23 +29,18 @@
|
|||
#define GET_BITFIELD(v, lo, hi) \
|
||||
(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
|
||||
|
||||
#define SKX_NUM_IMC 2 /* Memory controllers per socket */
|
||||
#define SKX_NUM_CHANNELS 3 /* Channels per memory controller */
|
||||
#define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */
|
||||
|
||||
#define I10NM_NUM_DDR_IMC 12
|
||||
#define I10NM_NUM_DDR_CHANNELS 2
|
||||
#define I10NM_NUM_DDR_DIMMS 2
|
||||
|
||||
#define I10NM_NUM_HBM_IMC 16
|
||||
#define I10NM_NUM_HBM_CHANNELS 2
|
||||
#define I10NM_NUM_HBM_DIMMS 1
|
||||
|
||||
#define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
|
||||
#define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
|
||||
#define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
|
||||
|
||||
#define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
|
||||
#define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
|
||||
#define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
|
||||
|
||||
|
|
@ -134,16 +129,7 @@ struct skx_dev {
|
|||
struct pci_dev *uracu; /* for i10nm CPU */
|
||||
struct pci_dev *pcu_cr3; /* for HBM memory detection */
|
||||
u32 mcroute;
|
||||
/*
|
||||
* Some server BIOS may hide certain memory controllers, and the
|
||||
* EDAC driver skips those hidden memory controllers. However, the
|
||||
* ADXL still decodes memory error address using physical memory
|
||||
* controller indices. The mapping table is used to convert the
|
||||
* physical indices (reported by ADXL) to the logical indices
|
||||
* (used the EDAC driver) of present memory controllers during the
|
||||
* error handling process.
|
||||
*/
|
||||
u8 mc_mapping[NUM_IMC];
|
||||
int num_imc;
|
||||
struct skx_imc {
|
||||
struct mem_ctl_info *mci;
|
||||
struct pci_dev *mdev; /* for i10nm CPU */
|
||||
|
|
@ -155,6 +141,16 @@ struct skx_dev {
|
|||
u8 mc; /* system wide mc# */
|
||||
u8 lmc; /* socket relative mc# */
|
||||
u8 src_id;
|
||||
/*
|
||||
* Some server BIOS may hide certain memory controllers, and the
|
||||
* EDAC driver skips those hidden memory controllers. However, the
|
||||
* ADXL still decodes memory error address using physical memory
|
||||
* controller indices. The mapping table is used to convert the
|
||||
* physical indices (reported by ADXL) to the logical indices
|
||||
* (used the EDAC driver) of present memory controllers during the
|
||||
* error handling process.
|
||||
*/
|
||||
u8 mc_mapping;
|
||||
struct skx_channel {
|
||||
struct pci_dev *cdev;
|
||||
struct pci_dev *edev;
|
||||
|
|
@ -171,7 +167,7 @@ struct skx_dev {
|
|||
u8 colbits;
|
||||
} dimms[NUM_DIMMS];
|
||||
} chan[NUM_CHANNELS];
|
||||
} imc[NUM_IMC];
|
||||
} imc[];
|
||||
};
|
||||
|
||||
struct skx_pvt {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,960 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* AMD Versal NET memory controller driver
|
||||
* Copyright (C) 2025 Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <linux/cdx/edac_cdx_pcol.h>
|
||||
#include <linux/edac.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of_device.h>
|
||||
#include <linux/ras.h>
|
||||
#include <linux/remoteproc.h>
|
||||
#include <linux/rpmsg.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <ras/ras_event.h>
|
||||
|
||||
#include "edac_module.h"
|
||||
|
||||
/* Granularity of reported error in bytes */
|
||||
#define MC5_ERR_GRAIN 1
|
||||
#define MC_GET_DDR_CONFIG_IN_LEN 4
|
||||
|
||||
#define MC5_IRQ_CE_MASK GENMASK(18, 15)
|
||||
#define MC5_IRQ_UE_MASK GENMASK(14, 11)
|
||||
|
||||
#define MC5_RANK_1_MASK GENMASK(11, 6)
|
||||
#define MASK_24 GENMASK(29, 24)
|
||||
#define MASK_0 GENMASK(5, 0)
|
||||
|
||||
#define MC5_LRANK_1_MASK GENMASK(11, 6)
|
||||
#define MC5_LRANK_2_MASK GENMASK(17, 12)
|
||||
#define MC5_BANK1_MASK GENMASK(11, 6)
|
||||
#define MC5_GRP_0_MASK GENMASK(17, 12)
|
||||
#define MC5_GRP_1_MASK GENMASK(23, 18)
|
||||
|
||||
#define MC5_REGHI_ROW 7
|
||||
#define MC5_EACHBIT 1
|
||||
#define MC5_ERR_TYPE_CE 0
|
||||
#define MC5_ERR_TYPE_UE 1
|
||||
#define MC5_HIGH_MEM_EN BIT(20)
|
||||
#define MC5_MEM_MASK GENMASK(19, 0)
|
||||
#define MC5_X16_BASE 256
|
||||
#define MC5_X16_ECC 32
|
||||
#define MC5_X16_SIZE (MC5_X16_BASE + MC5_X16_ECC)
|
||||
#define MC5_X32_SIZE 576
|
||||
#define MC5_HIMEM_BASE (256 * SZ_1M)
|
||||
#define MC5_ILC_HIMEM_EN BIT(28)
|
||||
#define MC5_ILC_MEM GENMASK(27, 0)
|
||||
#define MC5_INTERLEAVE_SEL GENMASK(3, 0)
|
||||
#define MC5_BUS_WIDTH_MASK GENMASK(19, 18)
|
||||
#define MC5_NUM_CHANS_MASK BIT(17)
|
||||
#define MC5_RANK_MASK GENMASK(15, 14)
|
||||
|
||||
#define ERROR_LEVEL 2
|
||||
#define ERROR_ID 3
|
||||
#define TOTAL_ERR_LENGTH 5
|
||||
#define MSG_ERR_OFFSET 8
|
||||
#define MSG_ERR_LENGTH 9
|
||||
#define ERROR_DATA 10
|
||||
#define MCDI_RESPONSE 0xFF
|
||||
|
||||
#define REG_MAX 152
|
||||
#define ADEC_MAX 152
|
||||
#define NUM_CONTROLLERS 8
|
||||
#define REGS_PER_CONTROLLER 19
|
||||
#define ADEC_NUM 19
|
||||
#define BUFFER_SZ 80
|
||||
|
||||
#define XDDR5_BUS_WIDTH_64 0
|
||||
#define XDDR5_BUS_WIDTH_32 1
|
||||
#define XDDR5_BUS_WIDTH_16 2
|
||||
|
||||
/**
|
||||
* struct ecc_error_info - ECC error log information.
|
||||
* @burstpos: Burst position.
|
||||
* @lrank: Logical Rank number.
|
||||
* @rank: Rank number.
|
||||
* @group: Group number.
|
||||
* @bank: Bank number.
|
||||
* @col: Column number.
|
||||
* @row: Row number.
|
||||
* @rowhi: Row number higher bits.
|
||||
* @i: Combined ECC error vector containing encoded values of burst position,
|
||||
* rank, bank, column, and row information.
|
||||
*/
|
||||
union ecc_error_info {
|
||||
struct {
|
||||
u32 burstpos:3;
|
||||
u32 lrank:4;
|
||||
u32 rank:2;
|
||||
u32 group:3;
|
||||
u32 bank:2;
|
||||
u32 col:11;
|
||||
u32 row:7;
|
||||
u32 rowhi;
|
||||
};
|
||||
u64 i;
|
||||
} __packed;
|
||||
|
||||
/* Row and column bit positions in the address decoder (ADEC) registers. */
|
||||
union row_col_mapping {
|
||||
struct {
|
||||
u32 row0:6;
|
||||
u32 row1:6;
|
||||
u32 row2:6;
|
||||
u32 row3:6;
|
||||
u32 row4:6;
|
||||
u32 reserved:2;
|
||||
};
|
||||
struct {
|
||||
u32 col1:6;
|
||||
u32 col2:6;
|
||||
u32 col3:6;
|
||||
u32 col4:6;
|
||||
u32 col5:6;
|
||||
u32 reservedcol:2;
|
||||
};
|
||||
u32 i;
|
||||
} __packed;
|
||||
|
||||
/**
|
||||
* struct ecc_status - ECC status information to report.
|
||||
* @ceinfo: Correctable errors.
|
||||
* @ueinfo: Uncorrected errors.
|
||||
* @channel: Channel number.
|
||||
* @error_type: Error type.
|
||||
*/
|
||||
struct ecc_status {
|
||||
union ecc_error_info ceinfo[2];
|
||||
union ecc_error_info ueinfo[2];
|
||||
u8 channel;
|
||||
u8 error_type;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct mc_priv - DDR memory controller private instance data.
|
||||
* @message: Buffer for framing the event specific info.
|
||||
* @stat: ECC status information.
|
||||
* @error_id: The error id.
|
||||
* @error_level: The error level.
|
||||
* @dwidth: Width of data bus excluding ECC bits.
|
||||
* @part_len: The support of the message received.
|
||||
* @regs: The registers sent on the rpmsg.
|
||||
* @adec: Address decode registers.
|
||||
* @mci: Memory controller interface.
|
||||
* @ept: rpmsg endpoint.
|
||||
* @mcdi: The mcdi handle.
|
||||
*/
|
||||
struct mc_priv {
|
||||
char message[256];
|
||||
struct ecc_status stat;
|
||||
u32 error_id;
|
||||
u32 error_level;
|
||||
u32 dwidth;
|
||||
u32 part_len;
|
||||
u32 regs[REG_MAX];
|
||||
u32 adec[ADEC_MAX];
|
||||
struct mem_ctl_info *mci[NUM_CONTROLLERS];
|
||||
struct rpmsg_endpoint *ept;
|
||||
struct cdx_mcdi *mcdi;
|
||||
};
|
||||
|
||||
/*
|
||||
* Address decoder (ADEC) registers to match the order in which the register
|
||||
* information is received from the firmware.
|
||||
*/
|
||||
enum adec_info {
|
||||
CONF = 0,
|
||||
ADEC0,
|
||||
ADEC1,
|
||||
ADEC2,
|
||||
ADEC3,
|
||||
ADEC4,
|
||||
ADEC5,
|
||||
ADEC6,
|
||||
ADEC7,
|
||||
ADEC8,
|
||||
ADEC9,
|
||||
ADEC10,
|
||||
ADEC11,
|
||||
ADEC12,
|
||||
ADEC13,
|
||||
ADEC14,
|
||||
ADEC15,
|
||||
ADEC16,
|
||||
ADECILC,
|
||||
};
|
||||
|
||||
enum reg_info {
|
||||
ISR = 0,
|
||||
IMR,
|
||||
ECCR0_ERR_STATUS,
|
||||
ECCR0_ADDR_LO,
|
||||
ECCR0_ADDR_HI,
|
||||
ECCR0_DATA_LO,
|
||||
ECCR0_DATA_HI,
|
||||
ECCR0_PAR,
|
||||
ECCR1_ERR_STATUS,
|
||||
ECCR1_ADDR_LO,
|
||||
ECCR1_ADDR_HI,
|
||||
ECCR1_DATA_LO,
|
||||
ECCR1_DATA_HI,
|
||||
ECCR1_PAR,
|
||||
XMPU_ERR,
|
||||
XMPU_ERR_ADDR_L0,
|
||||
XMPU_ERR_ADDR_HI,
|
||||
XMPU_ERR_AXI_ID,
|
||||
ADEC_CHK_ERR_LOG,
|
||||
};
|
||||
|
||||
static bool get_ddr_info(u32 *error_data, struct mc_priv *priv)
|
||||
{
|
||||
u32 reglo, reghi, parity, eccr0_val, eccr1_val, isr;
|
||||
struct ecc_status *p;
|
||||
|
||||
isr = error_data[ISR];
|
||||
|
||||
if (!(isr & (MC5_IRQ_UE_MASK | MC5_IRQ_CE_MASK)))
|
||||
return false;
|
||||
|
||||
eccr0_val = error_data[ECCR0_ERR_STATUS];
|
||||
eccr1_val = error_data[ECCR1_ERR_STATUS];
|
||||
|
||||
if (!eccr0_val && !eccr1_val)
|
||||
return false;
|
||||
|
||||
p = &priv->stat;
|
||||
|
||||
if (!eccr0_val)
|
||||
p->channel = 1;
|
||||
else
|
||||
p->channel = 0;
|
||||
|
||||
reglo = error_data[ECCR0_ADDR_LO];
|
||||
reghi = error_data[ECCR0_ADDR_HI];
|
||||
if (isr & MC5_IRQ_CE_MASK)
|
||||
p->ceinfo[0].i = reglo | (u64)reghi << 32;
|
||||
else if (isr & MC5_IRQ_UE_MASK)
|
||||
p->ueinfo[0].i = reglo | (u64)reghi << 32;
|
||||
|
||||
parity = error_data[ECCR0_PAR];
|
||||
edac_dbg(2, "ERR DATA: 0x%08X%08X PARITY: 0x%08X\n",
|
||||
reghi, reglo, parity);
|
||||
|
||||
reglo = error_data[ECCR1_ADDR_LO];
|
||||
reghi = error_data[ECCR1_ADDR_HI];
|
||||
if (isr & MC5_IRQ_CE_MASK)
|
||||
p->ceinfo[1].i = reglo | (u64)reghi << 32;
|
||||
else if (isr & MC5_IRQ_UE_MASK)
|
||||
p->ueinfo[1].i = reglo | (u64)reghi << 32;
|
||||
|
||||
parity = error_data[ECCR1_PAR];
|
||||
edac_dbg(2, "ERR DATA: 0x%08X%08X PARITY: 0x%08X\n",
|
||||
reghi, reglo, parity);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* convert_to_physical - Convert @error_data to a physical address.
|
||||
* @priv: DDR memory controller private instance data.
|
||||
* @pinf: ECC error info structure.
|
||||
* @controller: Controller number of the MC5
|
||||
* @error_data: the DDRMC5 ADEC address decoder register data
|
||||
*
|
||||
* Return: physical address of the DDR memory.
|
||||
*/
|
||||
static unsigned long convert_to_physical(struct mc_priv *priv,
|
||||
union ecc_error_info pinf,
|
||||
int controller, int *error_data)
|
||||
{
|
||||
u32 row, blk, rsh_req_addr, interleave, ilc_base_ctrl_add, ilc_himem_en, reg, offset;
|
||||
u64 high_mem_base, high_mem_offset, low_mem_offset, ilcmem_base;
|
||||
unsigned long err_addr = 0, addr;
|
||||
union row_col_mapping cols;
|
||||
union row_col_mapping rows;
|
||||
u32 col_bit_0;
|
||||
|
||||
row = pinf.rowhi << MC5_REGHI_ROW | pinf.row;
|
||||
offset = controller * ADEC_NUM;
|
||||
|
||||
reg = error_data[ADEC6];
|
||||
rows.i = reg;
|
||||
err_addr |= (row & BIT(0)) << rows.row0;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row1;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row2;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row3;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row4;
|
||||
row >>= MC5_EACHBIT;
|
||||
|
||||
reg = error_data[ADEC7];
|
||||
rows.i = reg;
|
||||
err_addr |= (row & BIT(0)) << rows.row0;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row1;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row2;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row3;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row4;
|
||||
row >>= MC5_EACHBIT;
|
||||
|
||||
reg = error_data[ADEC8];
|
||||
rows.i = reg;
|
||||
err_addr |= (row & BIT(0)) << rows.row0;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row1;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row2;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row3;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row4;
|
||||
|
||||
reg = error_data[ADEC9];
|
||||
rows.i = reg;
|
||||
|
||||
err_addr |= (row & BIT(0)) << rows.row0;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row1;
|
||||
row >>= MC5_EACHBIT;
|
||||
err_addr |= (row & BIT(0)) << rows.row2;
|
||||
row >>= MC5_EACHBIT;
|
||||
|
||||
col_bit_0 = FIELD_GET(MASK_24, error_data[ADEC9]);
|
||||
pinf.col >>= 1;
|
||||
err_addr |= (pinf.col & 1) << col_bit_0;
|
||||
|
||||
cols.i = error_data[ADEC10];
|
||||
err_addr |= (pinf.col & 1) << cols.col1;
|
||||
pinf.col >>= 1;
|
||||
err_addr |= (pinf.col & 1) << cols.col2;
|
||||
pinf.col >>= 1;
|
||||
err_addr |= (pinf.col & 1) << cols.col3;
|
||||
pinf.col >>= 1;
|
||||
err_addr |= (pinf.col & 1) << cols.col4;
|
||||
pinf.col >>= 1;
|
||||
err_addr |= (pinf.col & 1) << cols.col5;
|
||||
pinf.col >>= 1;
|
||||
|
||||
cols.i = error_data[ADEC11];
|
||||
err_addr |= (pinf.col & 1) << cols.col1;
|
||||
pinf.col >>= 1;
|
||||
err_addr |= (pinf.col & 1) << cols.col2;
|
||||
pinf.col >>= 1;
|
||||
err_addr |= (pinf.col & 1) << cols.col3;
|
||||
pinf.col >>= 1;
|
||||
err_addr |= (pinf.col & 1) << cols.col4;
|
||||
pinf.col >>= 1;
|
||||
err_addr |= (pinf.col & 1) << cols.col5;
|
||||
pinf.col >>= 1;
|
||||
|
||||
reg = error_data[ADEC12];
|
||||
err_addr |= (pinf.bank & BIT(0)) << (reg & MASK_0);
|
||||
pinf.bank >>= MC5_EACHBIT;
|
||||
err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_BANK1_MASK, reg);
|
||||
pinf.bank >>= MC5_EACHBIT;
|
||||
|
||||
err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_GRP_0_MASK, reg);
|
||||
pinf.group >>= MC5_EACHBIT;
|
||||
err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MC5_GRP_1_MASK, reg);
|
||||
pinf.group >>= MC5_EACHBIT;
|
||||
err_addr |= (pinf.bank & BIT(0)) << FIELD_GET(MASK_24, reg);
|
||||
pinf.group >>= MC5_EACHBIT;
|
||||
|
||||
reg = error_data[ADEC4];
|
||||
err_addr |= (pinf.rank & BIT(0)) << (reg & MASK_0);
|
||||
pinf.rank >>= MC5_EACHBIT;
|
||||
err_addr |= (pinf.rank & BIT(0)) << FIELD_GET(MC5_RANK_1_MASK, reg);
|
||||
pinf.rank >>= MC5_EACHBIT;
|
||||
|
||||
reg = error_data[ADEC5];
|
||||
err_addr |= (pinf.lrank & BIT(0)) << (reg & MASK_0);
|
||||
pinf.lrank >>= MC5_EACHBIT;
|
||||
err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MC5_LRANK_1_MASK, reg);
|
||||
pinf.lrank >>= MC5_EACHBIT;
|
||||
err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MC5_LRANK_2_MASK, reg);
|
||||
pinf.lrank >>= MC5_EACHBIT;
|
||||
err_addr |= (pinf.lrank & BIT(0)) << FIELD_GET(MASK_24, reg);
|
||||
pinf.lrank >>= MC5_EACHBIT;
|
||||
|
||||
high_mem_base = (priv->adec[ADEC2 + offset] & MC5_MEM_MASK) * MC5_HIMEM_BASE;
|
||||
interleave = priv->adec[ADEC13 + offset] & MC5_INTERLEAVE_SEL;
|
||||
|
||||
high_mem_offset = priv->adec[ADEC3 + offset] & MC5_MEM_MASK;
|
||||
low_mem_offset = priv->adec[ADEC1 + offset] & MC5_MEM_MASK;
|
||||
reg = priv->adec[ADEC14 + offset];
|
||||
ilc_himem_en = !!(reg & MC5_ILC_HIMEM_EN);
|
||||
ilcmem_base = (reg & MC5_ILC_MEM) * SZ_1M;
|
||||
if (ilc_himem_en)
|
||||
ilc_base_ctrl_add = ilcmem_base - high_mem_offset;
|
||||
else
|
||||
ilc_base_ctrl_add = ilcmem_base - low_mem_offset;
|
||||
|
||||
if (priv->dwidth == DEV_X16) {
|
||||
blk = err_addr / MC5_X16_SIZE;
|
||||
rsh_req_addr = (blk << 8) + ilc_base_ctrl_add;
|
||||
err_addr = rsh_req_addr * interleave * 2;
|
||||
} else {
|
||||
blk = err_addr / MC5_X32_SIZE;
|
||||
rsh_req_addr = (blk << 9) + ilc_base_ctrl_add;
|
||||
err_addr = rsh_req_addr * interleave * 2;
|
||||
}
|
||||
|
||||
if ((priv->adec[ADEC2 + offset] & MC5_HIGH_MEM_EN) && err_addr >= high_mem_base)
|
||||
addr = err_addr - high_mem_offset;
|
||||
else
|
||||
addr = err_addr - low_mem_offset;
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
/**
|
||||
* handle_error - Handle errors.
|
||||
* @priv: DDR memory controller private instance data.
|
||||
* @stat: ECC status structure.
|
||||
* @ctl_num: Controller number of the MC5
|
||||
* @error_data: the MC5 ADEC address decoder register data
|
||||
*
|
||||
* Handles ECC correctable and uncorrectable errors.
|
||||
*/
|
||||
static void handle_error(struct mc_priv *priv, struct ecc_status *stat,
|
||||
int ctl_num, int *error_data)
|
||||
{
|
||||
union ecc_error_info pinf;
|
||||
struct mem_ctl_info *mci;
|
||||
unsigned long pa;
|
||||
phys_addr_t pfn;
|
||||
int err;
|
||||
|
||||
if (WARN_ON_ONCE(ctl_num > NUM_CONTROLLERS))
|
||||
return;
|
||||
|
||||
mci = priv->mci[ctl_num];
|
||||
|
||||
if (stat->error_type == MC5_ERR_TYPE_CE) {
|
||||
pinf = stat->ceinfo[stat->channel];
|
||||
snprintf(priv->message, sizeof(priv->message),
|
||||
"Error type:%s Controller %d Addr at %lx\n",
|
||||
"CE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data));
|
||||
|
||||
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
|
||||
1, 0, 0, 0, 0, 0, -1,
|
||||
priv->message, "");
|
||||
}
|
||||
|
||||
if (stat->error_type == MC5_ERR_TYPE_UE) {
|
||||
pinf = stat->ueinfo[stat->channel];
|
||||
snprintf(priv->message, sizeof(priv->message),
|
||||
"Error type:%s controller %d Addr at %lx\n",
|
||||
"UE", ctl_num, convert_to_physical(priv, pinf, ctl_num, error_data));
|
||||
|
||||
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
|
||||
1, 0, 0, 0, 0, 0, -1,
|
||||
priv->message, "");
|
||||
pa = convert_to_physical(priv, pinf, ctl_num, error_data);
|
||||
pfn = PHYS_PFN(pa);
|
||||
|
||||
if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) {
|
||||
err = memory_failure(pfn, MF_ACTION_REQUIRED);
|
||||
if (err)
|
||||
edac_dbg(2, "memory_failure() error: %d", err);
|
||||
else
|
||||
edac_dbg(2, "Poison page at PA 0x%lx\n", pa);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void mc_init(struct mem_ctl_info *mci, struct device *dev)
|
||||
{
|
||||
struct mc_priv *priv = mci->pvt_info;
|
||||
struct csrow_info *csi;
|
||||
struct dimm_info *dimm;
|
||||
u32 row;
|
||||
int ch;
|
||||
|
||||
/* Initialize controller capabilities and configuration */
|
||||
mci->mtype_cap = MEM_FLAG_DDR5;
|
||||
mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED;
|
||||
mci->scrub_cap = SCRUB_HW_SRC;
|
||||
mci->scrub_mode = SCRUB_NONE;
|
||||
|
||||
mci->edac_cap = EDAC_FLAG_SECDED;
|
||||
mci->ctl_name = "VersalNET DDR5";
|
||||
mci->dev_name = dev_name(dev);
|
||||
mci->mod_name = "versalnet_edac";
|
||||
|
||||
edac_op_state = EDAC_OPSTATE_INT;
|
||||
|
||||
for (row = 0; row < mci->nr_csrows; row++) {
|
||||
csi = mci->csrows[row];
|
||||
for (ch = 0; ch < csi->nr_channels; ch++) {
|
||||
dimm = csi->channels[ch]->dimm;
|
||||
dimm->edac_mode = EDAC_SECDED;
|
||||
dimm->mtype = MEM_DDR5;
|
||||
dimm->grain = MC5_ERR_GRAIN;
|
||||
dimm->dtype = priv->dwidth;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define to_mci(k) container_of(k, struct mem_ctl_info, dev)
|
||||
|
||||
static unsigned int mcdi_rpc_timeout(struct cdx_mcdi *cdx, unsigned int cmd)
|
||||
{
|
||||
return MCDI_RPC_TIMEOUT;
|
||||
}
|
||||
|
||||
static void mcdi_request(struct cdx_mcdi *cdx,
|
||||
const struct cdx_dword *hdr, size_t hdr_len,
|
||||
const struct cdx_dword *sdu, size_t sdu_len)
|
||||
{
|
||||
void *send_buf;
|
||||
int ret;
|
||||
|
||||
send_buf = kzalloc(hdr_len + sdu_len, GFP_KERNEL);
|
||||
if (!send_buf)
|
||||
return;
|
||||
|
||||
memcpy(send_buf, hdr, hdr_len);
|
||||
memcpy(send_buf + hdr_len, sdu, sdu_len);
|
||||
|
||||
ret = rpmsg_send(cdx->ept, send_buf, hdr_len + sdu_len);
|
||||
if (ret)
|
||||
dev_err(&cdx->rpdev->dev, "Failed to send rpmsg data: %d\n", ret);
|
||||
|
||||
kfree(send_buf);
|
||||
}
|
||||
|
||||
static const struct cdx_mcdi_ops mcdi_ops = {
|
||||
.mcdi_rpc_timeout = mcdi_rpc_timeout,
|
||||
.mcdi_request = mcdi_request,
|
||||
};
|
||||
|
||||
static void get_ddr_config(u32 index, u32 *buffer, struct cdx_mcdi *amd_mcdi)
|
||||
{
|
||||
size_t outlen;
|
||||
int ret;
|
||||
|
||||
MCDI_DECLARE_BUF(inbuf, MC_GET_DDR_CONFIG_IN_LEN);
|
||||
MCDI_DECLARE_BUF(outbuf, BUFFER_SZ);
|
||||
|
||||
MCDI_SET_DWORD(inbuf, EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX, index);
|
||||
|
||||
ret = cdx_mcdi_rpc(amd_mcdi, MC_CMD_EDAC_GET_DDR_CONFIG, inbuf, sizeof(inbuf),
|
||||
outbuf, sizeof(outbuf), &outlen);
|
||||
if (!ret)
|
||||
memcpy(buffer, MCDI_PTR(outbuf, GET_DDR_CONFIG),
|
||||
(ADEC_NUM * 4));
|
||||
}
|
||||
|
||||
static int setup_mcdi(struct mc_priv *mc_priv)
|
||||
{
|
||||
struct cdx_mcdi *amd_mcdi;
|
||||
int ret, i;
|
||||
|
||||
amd_mcdi = kzalloc(sizeof(*amd_mcdi), GFP_KERNEL);
|
||||
if (!amd_mcdi)
|
||||
return -ENOMEM;
|
||||
|
||||
amd_mcdi->mcdi_ops = &mcdi_ops;
|
||||
ret = cdx_mcdi_init(amd_mcdi);
|
||||
if (ret) {
|
||||
kfree(amd_mcdi);
|
||||
return ret;
|
||||
}
|
||||
|
||||
amd_mcdi->ept = mc_priv->ept;
|
||||
mc_priv->mcdi = amd_mcdi;
|
||||
|
||||
for (i = 0; i < NUM_CONTROLLERS; i++)
|
||||
get_ddr_config(i, &mc_priv->adec[ADEC_NUM * i], amd_mcdi);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const guid_t amd_versalnet_guid = GUID_INIT(0x82678888, 0xa556, 0x44f2,
|
||||
0xb8, 0xb4, 0x45, 0x56, 0x2e,
|
||||
0x8c, 0x5b, 0xec);
|
||||
|
||||
static int rpmsg_cb(struct rpmsg_device *rpdev, void *data,
|
||||
int len, void *priv, u32 src)
|
||||
{
|
||||
struct mc_priv *mc_priv = dev_get_drvdata(&rpdev->dev);
|
||||
const guid_t *sec_type = &guid_null;
|
||||
u32 length, offset, error_id;
|
||||
u32 *result = (u32 *)data;
|
||||
struct ecc_status *p;
|
||||
int i, j, k, sec_sev;
|
||||
const char *err_str;
|
||||
u32 *adec_data;
|
||||
|
||||
if (*(u8 *)data == MCDI_RESPONSE) {
|
||||
cdx_mcdi_process_cmd(mc_priv->mcdi, (struct cdx_dword *)data, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
sec_sev = result[ERROR_LEVEL];
|
||||
error_id = result[ERROR_ID];
|
||||
length = result[MSG_ERR_LENGTH];
|
||||
offset = result[MSG_ERR_OFFSET];
|
||||
|
||||
if (result[TOTAL_ERR_LENGTH] > length) {
|
||||
if (!mc_priv->part_len)
|
||||
mc_priv->part_len = length;
|
||||
else
|
||||
mc_priv->part_len += length;
|
||||
/*
|
||||
* The data can come in 2 stretches. Construct the regs from 2
|
||||
* messages the offset indicates the offset from which the data is to
|
||||
* be taken
|
||||
*/
|
||||
for (i = 0 ; i < length; i++) {
|
||||
k = offset + i;
|
||||
j = ERROR_DATA + i;
|
||||
mc_priv->regs[k] = result[j];
|
||||
}
|
||||
if (mc_priv->part_len < result[TOTAL_ERR_LENGTH])
|
||||
return 0;
|
||||
mc_priv->part_len = 0;
|
||||
}
|
||||
|
||||
mc_priv->error_id = error_id;
|
||||
mc_priv->error_level = result[ERROR_LEVEL];
|
||||
|
||||
switch (error_id) {
|
||||
case 5: err_str = "General Software Non-Correctable error"; break;
|
||||
case 6: err_str = "CFU error"; break;
|
||||
case 7: err_str = "CFRAME error"; break;
|
||||
case 10: err_str = "DDRMC Microblaze Correctable ECC error"; break;
|
||||
case 11: err_str = "DDRMC Microblaze Non-Correctable ECC error"; break;
|
||||
case 15: err_str = "MMCM error"; break;
|
||||
case 16: err_str = "HNICX Correctable error"; break;
|
||||
case 17: err_str = "HNICX Non-Correctable error"; break;
|
||||
|
||||
case 18:
|
||||
p = &mc_priv->stat;
|
||||
memset(p, 0, sizeof(struct ecc_status));
|
||||
p->error_type = MC5_ERR_TYPE_CE;
|
||||
for (i = 0 ; i < NUM_CONTROLLERS; i++) {
|
||||
if (get_ddr_info(&mc_priv->regs[i * REGS_PER_CONTROLLER], mc_priv)) {
|
||||
adec_data = mc_priv->adec + ADEC_NUM * i;
|
||||
handle_error(mc_priv, &mc_priv->stat, i, adec_data);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
case 19:
|
||||
p = &mc_priv->stat;
|
||||
memset(p, 0, sizeof(struct ecc_status));
|
||||
p->error_type = MC5_ERR_TYPE_UE;
|
||||
for (i = 0 ; i < NUM_CONTROLLERS; i++) {
|
||||
if (get_ddr_info(&mc_priv->regs[i * REGS_PER_CONTROLLER], mc_priv)) {
|
||||
adec_data = mc_priv->adec + ADEC_NUM * i;
|
||||
handle_error(mc_priv, &mc_priv->stat, i, adec_data);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
case 21: err_str = "GT Non-Correctable error"; break;
|
||||
case 22: err_str = "PL Sysmon Correctable error"; break;
|
||||
case 23: err_str = "PL Sysmon Non-Correctable error"; break;
|
||||
case 111: err_str = "LPX unexpected dfx activation error"; break;
|
||||
case 114: err_str = "INT_LPD Non-Correctable error"; break;
|
||||
case 116: err_str = "INT_OCM Non-Correctable error"; break;
|
||||
case 117: err_str = "INT_FPD Correctable error"; break;
|
||||
case 118: err_str = "INT_FPD Non-Correctable error"; break;
|
||||
case 120: err_str = "INT_IOU Non-Correctable error"; break;
|
||||
case 123: err_str = "err_int_irq from APU GIC Distributor"; break;
|
||||
case 124: err_str = "fault_int_irq from APU GIC Distribute"; break;
|
||||
case 132 ... 139: err_str = "FPX SPLITTER error"; break;
|
||||
case 140: err_str = "APU Cluster 0 error"; break;
|
||||
case 141: err_str = "APU Cluster 1 error"; break;
|
||||
case 142: err_str = "APU Cluster 2 error"; break;
|
||||
case 143: err_str = "APU Cluster 3 error"; break;
|
||||
case 145: err_str = "WWDT1 LPX error"; break;
|
||||
case 147: err_str = "IPI error"; break;
|
||||
case 152 ... 153: err_str = "AFIFS error"; break;
|
||||
case 154 ... 155: err_str = "LPX glitch error"; break;
|
||||
case 185 ... 186: err_str = "FPX AFIFS error"; break;
|
||||
case 195 ... 199: err_str = "AFIFM error"; break;
|
||||
case 108: err_str = "PSM Correctable error"; break;
|
||||
case 59: err_str = "PMC correctable error"; break;
|
||||
case 60: err_str = "PMC Un correctable error"; break;
|
||||
case 43 ... 47: err_str = "PMC Sysmon error"; break;
|
||||
case 163 ... 184: err_str = "RPU error"; break;
|
||||
case 148: err_str = "OCM0 correctable error"; break;
|
||||
case 149: err_str = "OCM1 correctable error"; break;
|
||||
case 150: err_str = "OCM0 Un-correctable error"; break;
|
||||
case 151: err_str = "OCM1 Un-correctable error"; break;
|
||||
case 189: err_str = "PSX_CMN_3 PD block consolidated error"; break;
|
||||
case 191: err_str = "FPD_INT_WRAP PD block consolidated error"; break;
|
||||
case 232: err_str = "CRAM Un-Correctable error"; break;
|
||||
default: err_str = "VERSAL_EDAC_ERR_ID: %d"; break;
|
||||
}
|
||||
|
||||
snprintf(mc_priv->message,
|
||||
sizeof(mc_priv->message),
|
||||
"[VERSAL_EDAC_ERR_ID: %d] Error type: %s", error_id, err_str);
|
||||
|
||||
/* Convert to bytes */
|
||||
length = result[TOTAL_ERR_LENGTH] * 4;
|
||||
log_non_standard_event(sec_type, &amd_versalnet_guid, mc_priv->message,
|
||||
sec_sev, (void *)&result[ERROR_DATA], length);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct rpmsg_device_id amd_rpmsg_id_table[] = {
|
||||
{ .name = "error_ipc" },
|
||||
{ },
|
||||
};
|
||||
MODULE_DEVICE_TABLE(rpmsg, amd_rpmsg_id_table);
|
||||
|
||||
static int rpmsg_probe(struct rpmsg_device *rpdev)
|
||||
{
|
||||
struct rpmsg_channel_info chinfo;
|
||||
struct mc_priv *pg;
|
||||
|
||||
pg = (struct mc_priv *)amd_rpmsg_id_table[0].driver_data;
|
||||
chinfo.src = RPMSG_ADDR_ANY;
|
||||
chinfo.dst = rpdev->dst;
|
||||
strscpy(chinfo.name, amd_rpmsg_id_table[0].name,
|
||||
strlen(amd_rpmsg_id_table[0].name));
|
||||
|
||||
pg->ept = rpmsg_create_ept(rpdev, rpmsg_cb, NULL, chinfo);
|
||||
if (!pg->ept)
|
||||
return dev_err_probe(&rpdev->dev, -ENXIO, "Failed to create ept for channel %s\n",
|
||||
chinfo.name);
|
||||
|
||||
dev_set_drvdata(&rpdev->dev, pg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rpmsg_remove(struct rpmsg_device *rpdev)
|
||||
{
|
||||
struct mc_priv *mc_priv = dev_get_drvdata(&rpdev->dev);
|
||||
|
||||
rpmsg_destroy_ept(mc_priv->ept);
|
||||
dev_set_drvdata(&rpdev->dev, NULL);
|
||||
}
|
||||
|
||||
static struct rpmsg_driver amd_rpmsg_driver = {
|
||||
.drv.name = KBUILD_MODNAME,
|
||||
.probe = rpmsg_probe,
|
||||
.remove = rpmsg_remove,
|
||||
.callback = rpmsg_cb,
|
||||
.id_table = amd_rpmsg_id_table,
|
||||
};
|
||||
|
||||
static void versal_edac_release(struct device *dev)
|
||||
{
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static int init_versalnet(struct mc_priv *priv, struct platform_device *pdev)
|
||||
{
|
||||
u32 num_chans, rank, dwidth, config;
|
||||
struct edac_mc_layer layers[2];
|
||||
struct mem_ctl_info *mci;
|
||||
struct device *dev;
|
||||
enum dev_type dt;
|
||||
char *name;
|
||||
int rc, i;
|
||||
|
||||
for (i = 0; i < NUM_CONTROLLERS; i++) {
|
||||
config = priv->adec[CONF + i * ADEC_NUM];
|
||||
num_chans = FIELD_GET(MC5_NUM_CHANS_MASK, config);
|
||||
rank = 1 << FIELD_GET(MC5_RANK_MASK, config);
|
||||
dwidth = FIELD_GET(MC5_BUS_WIDTH_MASK, config);
|
||||
|
||||
switch (dwidth) {
|
||||
case XDDR5_BUS_WIDTH_16:
|
||||
dt = DEV_X16;
|
||||
break;
|
||||
case XDDR5_BUS_WIDTH_32:
|
||||
dt = DEV_X32;
|
||||
break;
|
||||
case XDDR5_BUS_WIDTH_64:
|
||||
dt = DEV_X64;
|
||||
break;
|
||||
default:
|
||||
dt = DEV_UNKNOWN;
|
||||
}
|
||||
|
||||
if (dt == DEV_UNKNOWN)
|
||||
continue;
|
||||
|
||||
/* Find the first enabled device and register that one. */
|
||||
layers[0].type = EDAC_MC_LAYER_CHIP_SELECT;
|
||||
layers[0].size = rank;
|
||||
layers[0].is_virt_csrow = true;
|
||||
layers[1].type = EDAC_MC_LAYER_CHANNEL;
|
||||
layers[1].size = num_chans;
|
||||
layers[1].is_virt_csrow = false;
|
||||
|
||||
rc = -ENOMEM;
|
||||
mci = edac_mc_alloc(i, ARRAY_SIZE(layers), layers,
|
||||
sizeof(struct mc_priv));
|
||||
if (!mci) {
|
||||
edac_printk(KERN_ERR, EDAC_MC, "Failed memory allocation for MC%d\n", i);
|
||||
goto err_alloc;
|
||||
}
|
||||
|
||||
priv->mci[i] = mci;
|
||||
priv->dwidth = dt;
|
||||
|
||||
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
||||
dev->release = versal_edac_release;
|
||||
name = kmalloc(32, GFP_KERNEL);
|
||||
sprintf(name, "versal-net-ddrmc5-edac-%d", i);
|
||||
dev->init_name = name;
|
||||
rc = device_register(dev);
|
||||
if (rc)
|
||||
goto err_alloc;
|
||||
|
||||
mci->pdev = dev;
|
||||
|
||||
platform_set_drvdata(pdev, priv);
|
||||
|
||||
mc_init(mci, dev);
|
||||
rc = edac_mc_add_mc(mci);
|
||||
if (rc) {
|
||||
edac_printk(KERN_ERR, EDAC_MC, "Failed to register MC%d with EDAC core\n", i);
|
||||
goto err_alloc;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
||||
err_alloc:
|
||||
while (i--) {
|
||||
mci = priv->mci[i];
|
||||
if (!mci)
|
||||
continue;
|
||||
|
||||
if (mci->pdev) {
|
||||
device_unregister(mci->pdev);
|
||||
edac_mc_del_mc(mci->pdev);
|
||||
}
|
||||
|
||||
edac_mc_free(mci);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void remove_versalnet(struct mc_priv *priv)
|
||||
{
|
||||
struct mem_ctl_info *mci;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NUM_CONTROLLERS; i++) {
|
||||
device_unregister(priv->mci[i]->pdev);
|
||||
mci = edac_mc_del_mc(priv->mci[i]->pdev);
|
||||
if (!mci)
|
||||
return;
|
||||
|
||||
edac_mc_free(mci);
|
||||
}
|
||||
}
|
||||
|
||||
static int mc_probe(struct platform_device *pdev)
|
||||
{
|
||||
struct device_node *r5_core_node;
|
||||
struct mc_priv *priv;
|
||||
struct rproc *rp;
|
||||
int rc;
|
||||
|
||||
r5_core_node = of_parse_phandle(pdev->dev.of_node, "amd,rproc", 0);
|
||||
if (!r5_core_node) {
|
||||
dev_err(&pdev->dev, "amd,rproc: invalid phandle\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rp = rproc_get_by_phandle(r5_core_node->phandle);
|
||||
if (!rp)
|
||||
return -EPROBE_DEFER;
|
||||
|
||||
rc = rproc_boot(rp);
|
||||
if (rc) {
|
||||
dev_err(&pdev->dev, "Failed to attach to remote processor\n");
|
||||
goto err_rproc_boot;
|
||||
}
|
||||
|
||||
priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
|
||||
if (!priv) {
|
||||
rc = -ENOMEM;
|
||||
goto err_alloc;
|
||||
}
|
||||
|
||||
amd_rpmsg_id_table[0].driver_data = (kernel_ulong_t)priv;
|
||||
|
||||
rc = register_rpmsg_driver(&amd_rpmsg_driver);
|
||||
if (rc) {
|
||||
edac_printk(KERN_ERR, EDAC_MC, "Failed to register RPMsg driver: %d\n", rc);
|
||||
goto err_alloc;
|
||||
}
|
||||
|
||||
rc = setup_mcdi(priv);
|
||||
if (rc)
|
||||
goto err_unreg;
|
||||
|
||||
priv->mcdi->r5_rproc = rp;
|
||||
|
||||
rc = init_versalnet(priv, pdev);
|
||||
if (rc)
|
||||
goto err_init;
|
||||
|
||||
return 0;
|
||||
|
||||
err_init:
|
||||
cdx_mcdi_finish(priv->mcdi);
|
||||
|
||||
err_unreg:
|
||||
unregister_rpmsg_driver(&amd_rpmsg_driver);
|
||||
|
||||
err_alloc:
|
||||
rproc_shutdown(rp);
|
||||
|
||||
err_rproc_boot:
|
||||
rproc_put(rp);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void mc_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct mc_priv *priv = platform_get_drvdata(pdev);
|
||||
|
||||
unregister_rpmsg_driver(&amd_rpmsg_driver);
|
||||
remove_versalnet(priv);
|
||||
rproc_shutdown(priv->mcdi->r5_rproc);
|
||||
cdx_mcdi_finish(priv->mcdi);
|
||||
}
|
||||
|
||||
static const struct of_device_id amd_edac_match[] = {
|
||||
{ .compatible = "xlnx,versal-net-ddrmc5", },
|
||||
{}
|
||||
};
|
||||
MODULE_DEVICE_TABLE(of, amd_edac_match);
|
||||
|
||||
static struct platform_driver amd_ddr_edac_mc_driver = {
|
||||
.driver = {
|
||||
.name = "versal-net-edac",
|
||||
.of_match_table = amd_edac_match,
|
||||
},
|
||||
.probe = mc_probe,
|
||||
.remove = mc_remove,
|
||||
};
|
||||
|
||||
module_platform_driver(amd_ddr_edac_mc_driver);
|
||||
|
||||
MODULE_AUTHOR("AMD Inc");
|
||||
MODULE_DESCRIPTION("Versal NET EDAC driver");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
|
@ -51,6 +51,7 @@ void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
|
|||
{
|
||||
trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(log_non_standard_event);
|
||||
|
||||
void log_arm_hw_error(struct cper_sec_proc_arm *err)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -0,0 +1,28 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Driver for AMD network controllers and boards
|
||||
*
|
||||
* Copyright (C) 2021, Xilinx, Inc.
|
||||
* Copyright (C) 2022-2023, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef MC_CDX_PCOL_H
|
||||
#define MC_CDX_PCOL_H
|
||||
#include <linux/cdx/mcdi.h>
|
||||
|
||||
#define MC_CMD_EDAC_GET_DDR_CONFIG_OUT_WORD_LENGTH_LEN 4
|
||||
/* Number of registers for the DDR controller */
|
||||
#define MC_CMD_GET_DDR_CONFIG_OFST 4
|
||||
#define MC_CMD_GET_DDR_CONFIG_LEN 4
|
||||
|
||||
/***********************************/
|
||||
/* MC_CMD_EDAC_GET_DDR_CONFIG
|
||||
* Provides detailed configuration for the DDR controller of the given index.
|
||||
*/
|
||||
#define MC_CMD_EDAC_GET_DDR_CONFIG 0x3
|
||||
|
||||
/* MC_CMD_EDAC_GET_DDR_CONFIG_IN msgrequest */
|
||||
#define MC_CMD_EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX_OFST 0
|
||||
#define MC_CMD_EDAC_GET_DDR_CONFIG_IN_CONTROLLER_INDEX_LEN 4
|
||||
|
||||
#endif /* MC_CDX_PCOL_H */
|
||||
|
|
@ -11,16 +11,7 @@
|
|||
#include <linux/kref.h>
|
||||
#include <linux/rpmsg.h>
|
||||
|
||||
#include "bitfield.h"
|
||||
#include "mc_cdx_pcol.h"
|
||||
|
||||
#ifdef DEBUG
|
||||
#define CDX_WARN_ON_ONCE_PARANOID(x) WARN_ON_ONCE(x)
|
||||
#define CDX_WARN_ON_PARANOID(x) WARN_ON(x)
|
||||
#else
|
||||
#define CDX_WARN_ON_ONCE_PARANOID(x) do {} while (0)
|
||||
#define CDX_WARN_ON_PARANOID(x) do {} while (0)
|
||||
#endif
|
||||
#include "linux/cdx/bitfield.h"
|
||||
|
||||
/**
|
||||
* enum cdx_mcdi_mode - MCDI transaction mode
|
||||
|
|
@ -36,8 +27,6 @@ enum cdx_mcdi_mode {
|
|||
#define MCDI_RPC_LONG_TIMEOU (60 * HZ)
|
||||
#define MCDI_RPC_POST_RST_TIME (10 * HZ)
|
||||
|
||||
#define MCDI_BUF_LEN (8 + MCDI_CTL_SDU_LEN_MAX)
|
||||
|
||||
/**
|
||||
* enum cdx_mcdi_cmd_state - State for an individual MCDI command
|
||||
* @MCDI_STATE_QUEUED: Command not started and is waiting to run.
|
||||
|
|
@ -180,24 +169,12 @@ struct cdx_mcdi_data {
|
|||
u32 fn_flags;
|
||||
};
|
||||
|
||||
static inline struct cdx_mcdi_iface *cdx_mcdi_if(struct cdx_mcdi *cdx)
|
||||
{
|
||||
return cdx->mcdi ? &cdx->mcdi->iface : NULL;
|
||||
}
|
||||
|
||||
int cdx_mcdi_init(struct cdx_mcdi *cdx);
|
||||
void cdx_mcdi_finish(struct cdx_mcdi *cdx);
|
||||
|
||||
int cdx_mcdi_init(struct cdx_mcdi *cdx);
|
||||
void cdx_mcdi_process_cmd(struct cdx_mcdi *cdx, struct cdx_dword *outbuf, int len);
|
||||
int cdx_mcdi_rpc(struct cdx_mcdi *cdx, unsigned int cmd,
|
||||
const struct cdx_dword *inbuf, size_t inlen,
|
||||
struct cdx_dword *outbuf, size_t outlen, size_t *outlen_actual);
|
||||
int cdx_mcdi_rpc_async(struct cdx_mcdi *cdx, unsigned int cmd,
|
||||
const struct cdx_dword *inbuf, size_t inlen,
|
||||
cdx_mcdi_async_completer *complete,
|
||||
unsigned long cookie);
|
||||
int cdx_mcdi_wait_for_quiescence(struct cdx_mcdi *cdx,
|
||||
unsigned int timeout_jiffies);
|
||||
|
||||
/*
|
||||
* We expect that 16- and 32-bit fields in MCDI requests and responses
|
||||
|
|
@ -215,28 +192,8 @@ int cdx_mcdi_wait_for_quiescence(struct cdx_mcdi *cdx,
|
|||
#define _MCDI_DWORD(_buf, _field) \
|
||||
((_buf) + (_MCDI_CHECK_ALIGN(MC_CMD_ ## _field ## _OFST, 4) >> 2))
|
||||
|
||||
#define MCDI_BYTE(_buf, _field) \
|
||||
((void)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 1), \
|
||||
*MCDI_PTR(_buf, _field))
|
||||
#define MCDI_WORD(_buf, _field) \
|
||||
((void)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 2), \
|
||||
le16_to_cpu(*(__force const __le16 *)MCDI_PTR(_buf, _field)))
|
||||
#define MCDI_SET_DWORD(_buf, _field, _value) \
|
||||
CDX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), CDX_DWORD, _value)
|
||||
#define MCDI_DWORD(_buf, _field) \
|
||||
CDX_DWORD_FIELD(*_MCDI_DWORD(_buf, _field), CDX_DWORD)
|
||||
#define MCDI_POPULATE_DWORD_1(_buf, _field, _name1, _value1) \
|
||||
CDX_POPULATE_DWORD_1(*_MCDI_DWORD(_buf, _field), \
|
||||
MC_CMD_ ## _name1, _value1)
|
||||
#define MCDI_SET_QWORD(_buf, _field, _value) \
|
||||
do { \
|
||||
CDX_POPULATE_DWORD_1(_MCDI_DWORD(_buf, _field)[0], \
|
||||
CDX_DWORD, (u32)(_value)); \
|
||||
CDX_POPULATE_DWORD_1(_MCDI_DWORD(_buf, _field)[1], \
|
||||
CDX_DWORD, (u64)(_value) >> 32); \
|
||||
} while (0)
|
||||
#define MCDI_QWORD(_buf, _field) \
|
||||
(CDX_DWORD_FIELD(_MCDI_DWORD(_buf, _field)[0], CDX_DWORD) | \
|
||||
(u64)CDX_DWORD_FIELD(_MCDI_DWORD(_buf, _field)[1], CDX_DWORD) << 32)
|
||||
|
||||
#endif /* CDX_MCDI_H */
|
||||
Loading…
Reference in New Issue