Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Mar 2019 17:11:39 +0000 (09:11 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Mar 2019 17:11:39 +0000 (09:11 -0800)
Pull RAS updates from Borislav Petkov:
 "This time around we have in store:

   - Disable MC4_MISC thresholding banks on all AMD family 0x15 models
     (Shirish S)

   - AMD MCE error descriptions update and error decode improvements
     (Yazen Ghannam)

   - The usual smaller conversions and fixes"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Improve error message when kernel cannot recover, p2
  EDAC/mce_amd: Decode MCA_STATUS in bit definition order
  EDAC/mce_amd: Decode MCA_STATUS[Scrub] bit
  EDAC, mce_amd: Print ExtErrorCode and description on a single line
  EDAC, mce_amd: Match error descriptions to latest documentation
  x86/MCE/AMD, EDAC/mce_amd: Add new error descriptions for some SMCA bank types
  x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units
  x86/MCE/AMD, EDAC/mce_amd: Add new MP5, NBIO, and PCIE SMCA bank types
  RAS: Add a MAINTAINERS entry
  RAS: Use consistent types for UUIDs
  x86/MCE/AMD: Carve out the MC4_MISC thresholding quirk
  x86/MCE/AMD: Turn off MC4_MISC thresholding on all family 0x15 models
  x86/MCE: Switch to use the new generic UUID API

MAINTAINERS
arch/x86/include/asm/mce.h
arch/x86/kernel/cpu/mce/amd.c
arch/x86/kernel/cpu/mce/apei.c
arch/x86/kernel/cpu/mce/core.c
arch/x86/kernel/cpu/mce/severity.c
drivers/edac/mce_amd.c
drivers/ras/ras.c
include/ras/ras_event.h

index f758445317c5c909bfb52ad23a2f1f1810bdaec2..58de951ee4ba689453660561842dccb63d4ca04f 100644 (file)
@@ -12960,6 +12960,16 @@ M:     Alexandre Bounine <alex.bou9@gmail.com>
 S:     Maintained
 F:     drivers/rapidio/
 
+RAS INFRASTRUCTURE
+M:     Tony Luck <tony.luck@intel.com>
+M:     Borislav Petkov <bp@alien8.de>
+L:     linux-edac@vger.kernel.org
+S:     Maintained
+F:     drivers/ras/
+F:     include/linux/ras.h
+F:     include/ras/ras_event.h
+F:     Documentation/admin-guide/ras.rst
+
 RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER
 L:     linux-wireless@vger.kernel.org
 S:     Orphan
index c1a812bd5a27d770da1076c5b22ca9dc7dd66762..22d05e3835f0b81424690706072a35989b3cc374 100644 (file)
@@ -48,6 +48,7 @@
 #define MCI_STATUS_SYNDV       BIT_ULL(53)  /* synd reg. valid */
 #define MCI_STATUS_DEFERRED    BIT_ULL(44)  /* uncorrected error, deferred exception */
 #define MCI_STATUS_POISON      BIT_ULL(43)  /* access poisonous data */
+#define MCI_STATUS_SCRUB       BIT_ULL(40)  /* Error detected during scrub operation */
 
 /*
  * McaX field if set indicates a given bank supports MCA extensions:
@@ -307,11 +308,17 @@ enum smca_bank_types {
        SMCA_FP,        /* Floating Point */
        SMCA_L3_CACHE,  /* L3 Cache */
        SMCA_CS,        /* Coherent Slave */
+       SMCA_CS_V2,     /* Coherent Slave */
        SMCA_PIE,       /* Power, Interrupts, etc. */
        SMCA_UMC,       /* Unified Memory Controller */
        SMCA_PB,        /* Parameter Block */
        SMCA_PSP,       /* Platform Security Processor */
+       SMCA_PSP_V2,    /* Platform Security Processor */
        SMCA_SMU,       /* System Management Unit */
+       SMCA_SMU_V2,    /* System Management Unit */
+       SMCA_MP5,       /* Microprocessor 5 Unit */
+       SMCA_NBIO,      /* Northbridge IO Unit */
+       SMCA_PCIE,      /* PCI Express Unit */
        N_SMCA_BANK_TYPES
 };
 
index 89298c83de53b226227d4271ee5fcc85f2a118fc..e64de5149e50e8c35518cf56ed4d0ebc61e9c78f 100644 (file)
@@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = {
        [SMCA_FP]       = { "floating_point",   "Floating Point Unit" },
        [SMCA_L3_CACHE] = { "l3_cache",         "L3 Cache" },
        [SMCA_CS]       = { "coherent_slave",   "Coherent Slave" },
+       [SMCA_CS_V2]    = { "coherent_slave",   "Coherent Slave" },
        [SMCA_PIE]      = { "pie",              "Power, Interrupts, etc." },
        [SMCA_UMC]      = { "umc",              "Unified Memory Controller" },
        [SMCA_PB]       = { "param_block",      "Parameter Block" },
        [SMCA_PSP]      = { "psp",              "Platform Security Processor" },
+       [SMCA_PSP_V2]   = { "psp",              "Platform Security Processor" },
        [SMCA_SMU]      = { "smu",              "System Management Unit" },
+       [SMCA_SMU_V2]   = { "smu",              "System Management Unit" },
+       [SMCA_MP5]      = { "mp5",              "Microprocessor 5 Unit" },
+       [SMCA_NBIO]     = { "nbio",             "Northbridge IO Unit" },
+       [SMCA_PCIE]     = { "pcie",             "PCI Express Unit" },
 };
 
 static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
@@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
        { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
 
        /* ZN Core (HWID=0xB0) MCA types */
-       { SMCA_LS,       HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+       { SMCA_LS,       HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
        { SMCA_IF,       HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
        { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
        { SMCA_DE,       HWID_MCATYPE(0xB0, 0x3), 0x1FF },
        /* HWID 0xB0 MCATYPE 0x4 is Reserved */
-       { SMCA_EX,       HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+       { SMCA_EX,       HWID_MCATYPE(0xB0, 0x5), 0xFFF },
        { SMCA_FP,       HWID_MCATYPE(0xB0, 0x6), 0x7F },
        { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
 
        /* Data Fabric MCA types */
        { SMCA_CS,       HWID_MCATYPE(0x2E, 0x0), 0x1FF },
-       { SMCA_PIE,      HWID_MCATYPE(0x2E, 0x1), 0xF },
+       { SMCA_PIE,      HWID_MCATYPE(0x2E, 0x1), 0x1F },
+       { SMCA_CS_V2,    HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
 
        /* Unified Memory Controller MCA type */
-       { SMCA_UMC,      HWID_MCATYPE(0x96, 0x0), 0x3F },
+       { SMCA_UMC,      HWID_MCATYPE(0x96, 0x0), 0xFF },
 
        /* Parameter Block MCA type */
        { SMCA_PB,       HWID_MCATYPE(0x05, 0x0), 0x1 },
 
        /* Platform Security Processor MCA type */
        { SMCA_PSP,      HWID_MCATYPE(0xFF, 0x0), 0x1 },
+       { SMCA_PSP_V2,   HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
 
        /* System Management Unit MCA type */
        { SMCA_SMU,      HWID_MCATYPE(0x01, 0x0), 0x1 },
+       { SMCA_SMU_V2,   HWID_MCATYPE(0x01, 0x1), 0x7FF },
+
+       /* Microprocessor 5 Unit MCA type */
+       { SMCA_MP5,      HWID_MCATYPE(0x01, 0x2), 0x3FF },
+
+       /* Northbridge IO Unit MCA type */
+       { SMCA_NBIO,     HWID_MCATYPE(0x18, 0x0), 0x1F },
+
+       /* PCI Express Unit MCA type */
+       { SMCA_PCIE,     HWID_MCATYPE(0x46, 0x0), 0x1F },
 };
 
 struct smca_bank smca_banks[MAX_NR_BANKS];
@@ -545,6 +563,40 @@ out:
        return offset;
 }
 
+/*
+ * Turn off MC4_MISC thresholding banks on all family 0x15 models since
+ * they're not supported there.
+ */
+void disable_err_thresholding(struct cpuinfo_x86 *c)
+{
+       int i;
+       u64 hwcr;
+       bool need_toggle;
+       u32 msrs[] = {
+               0x00000413, /* MC4_MISC0 */
+               0xc0000408, /* MC4_MISC1 */
+       };
+
+       if (c->x86 != 0x15)
+               return;
+
+       rdmsrl(MSR_K7_HWCR, hwcr);
+
+       /* McStatusWrEn has to be set */
+       need_toggle = !(hwcr & BIT(18));
+
+       if (need_toggle)
+               wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+
+       /* Clear CntP bit safely */
+       for (i = 0; i < ARRAY_SIZE(msrs); i++)
+               msr_clear_bit(msrs[i], 62);
+
+       /* restore old settings */
+       if (need_toggle)
+               wrmsrl(MSR_K7_HWCR, hwcr);
+}
+
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
@@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
        unsigned int bank, block, cpu = smp_processor_id();
        int offset = -1;
 
+       disable_err_thresholding(c);
+
        for (bank = 0; bank < mca_cfg.banks; ++bank) {
                if (mce_flags.smca)
                        smca_configure(bank, cpu);
index 1d9b3ce662a0b8a6d8347a76ae88257c1b82531d..c038e5c00a59f96f8344c9c9256ad27d5002355c 100644 (file)
@@ -64,11 +64,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
 EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
 
 #define CPER_CREATOR_MCE                                               \
-       UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,     \
-               0x64, 0x90, 0xb8, 0x9d)
+       GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c,   \
+                 0x64, 0x90, 0xb8, 0x9d)
 #define CPER_SECTION_TYPE_MCE                                          \
-       UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96,     \
-               0x04, 0x4a, 0x38, 0xfc)
+       GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96,   \
+                 0x04, 0x4a, 0x38, 0xfc)
 
 /*
  * CPER specification (in UEFI specification 2.3 appendix N) requires
@@ -135,7 +135,7 @@ retry:
                goto out;
        /* try to skip other type records in storage */
        else if (rc != sizeof(rcd) ||
-                uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
+                !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
                goto retry;
        memcpy(m, &rcd.mce, sizeof(*m));
        rc = sizeof(*m);
index 6ce290c506d93e83318f3b13b95d8f5861e02983..b7fb541a4873f7803b216b7987fa66517bc5fff6 100644 (file)
@@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
                if (c->x86 == 0x15 && c->x86_model <= 0xf)
                        mce_flags.overflow_recov = 1;
 
-               /*
-                * Turn off MC4_MISC thresholding banks on those models since
-                * they're not supported there.
-                */
-               if (c->x86 == 0x15 &&
-                   (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
-                       int i;
-                       u64 hwcr;
-                       bool need_toggle;
-                       u32 msrs[] = {
-                               0x00000413, /* MC4_MISC0 */
-                               0xc0000408, /* MC4_MISC1 */
-                       };
-
-                       rdmsrl(MSR_K7_HWCR, hwcr);
-
-                       /* McStatusWrEn has to be set */
-                       need_toggle = !(hwcr & BIT(18));
-
-                       if (need_toggle)
-                               wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
-
-                       /* Clear CntP bit safely */
-                       for (i = 0; i < ARRAY_SIZE(msrs); i++)
-                               msr_clear_bit(msrs[i], 62);
-
-                       /* restore old settings */
-                       if (need_toggle)
-                               wrmsrl(MSR_K7_HWCR, hwcr);
-               }
        }
 
        if (c->x86_vendor == X86_VENDOR_INTEL) {
index dc3e26e905a32f5c346852606a55111338b6ca2d..65201e180fe0ee019b7571fc3921a0745a6bdbe1 100644 (file)
@@ -165,6 +165,11 @@ static struct severity {
                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
                KERNEL
                ),
+       MCESEV(
+               PANIC, "Instruction fetch error in kernel",
+               SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+               KERNEL
+               ),
 #endif
        MCESEV(
                PANIC, "Action required: unknown MCACOD",
index c605089d899f6cf31d2fbad5132044eb58cf147c..0a1814dad6cf24d8443bc591b2522d6af5f48f6c 100644 (file)
@@ -151,138 +151,223 @@ static const char * const mc6_mce_desc[] = {
 
 /* Scalable MCA error strings */
 static const char * const smca_ls_mce_desc[] = {
-       "Load queue parity",
-       "Store queue parity",
-       "Miss address buffer payload parity",
-       "L1 TLB parity",
-       "Reserved",
-       "DC tag error type 6",
-       "DC tag error type 1",
+       "Load queue parity error",
+       "Store queue parity error",
+       "Miss address buffer payload parity error",
+       "Level 1 TLB parity error",
+       "DC Tag error type 5",
+       "DC Tag error type 6",
+       "DC Tag error type 1",
        "Internal error type 1",
        "Internal error type 2",
-       "Sys Read data error thread 0",
-       "Sys read data error thread 1",
-       "DC tag error type 2",
-       "DC data error type 1 (poison consumption)",
-       "DC data error type 2",
-       "DC data error type 3",
-       "DC tag error type 4",
-       "L2 TLB parity",
+       "System Read Data Error Thread 0",
+       "System Read Data Error Thread 1",
+       "DC Tag error type 2",
+       "DC Data error type 1 and poison consumption",
+       "DC Data error type 2",
+       "DC Data error type 3",
+       "DC Tag error type 4",
+       "Level 2 TLB parity error",
        "PDC parity error",
-       "DC tag error type 3",
-       "DC tag error type 5",
-       "L2 fill data error",
+       "DC Tag error type 3",
+       "DC Tag error type 5",
+       "L2 Fill Data error",
 };
 
 static const char * const smca_if_mce_desc[] = {
-       "microtag probe port parity error",
-       "IC microtag or full tag multi-hit error",
-       "IC full tag parity",
-       "IC data array parity",
-       "Decoupling queue phys addr parity error",
-       "L0 ITLB parity error",
-       "L1 ITLB parity error",
-       "L2 ITLB parity error",
-       "BPQ snoop parity on Thread 0",
-       "BPQ snoop parity on Thread 1",
-       "L1 BTB multi-match error",
-       "L2 BTB multi-match error",
-       "L2 Cache Response Poison error",
-       "System Read Data error",
+       "Op Cache Microtag Probe Port Parity Error",
+       "IC Microtag or Full Tag Multi-hit Error",
+       "IC Full Tag Parity Error",
+       "IC Data Array Parity Error",
+       "Decoupling Queue PhysAddr Parity Error",
+       "L0 ITLB Parity Error",
+       "L1 ITLB Parity Error",
+       "L2 ITLB Parity Error",
+       "BPQ Thread 0 Snoop Parity Error",
+       "BPQ Thread 1 Snoop Parity Error",
+       "L1 BTB Multi-Match Error",
+       "L2 BTB Multi-Match Error",
+       "L2 Cache Response Poison Error",
+       "System Read Data Error",
 };
 
 static const char * const smca_l2_mce_desc[] = {
-       "L2M tag multi-way-hit error",
-       "L2M tag ECC error",
-       "L2M data ECC error",
-       "HW assert",
+       "L2M Tag Multiple-Way-Hit error",
+       "L2M Tag or State Array ECC Error",
+       "L2M Data Array ECC Error",
+       "Hardware Assert Error",
 };
 
 static const char * const smca_de_mce_desc[] = {
-       "uop cache tag parity error",
-       "uop cache data parity error",
-       "Insn buffer parity error",
-       "uop queue parity error",
-       "Insn dispatch queue parity error",
-       "Fetch address FIFO parity",
-       "Patch RAM data parity",
-       "Patch RAM sequencer parity",
-       "uop buffer parity"
+       "Micro-op cache tag parity error",
+       "Micro-op cache data parity error",
+       "Instruction buffer parity error",
+       "Micro-op queue parity error",
+       "Instruction dispatch queue parity error",
+       "Fetch address FIFO parity error",
+       "Patch RAM data parity error",
+       "Patch RAM sequencer parity error",
+       "Micro-op buffer parity error"
 };
 
 static const char * const smca_ex_mce_desc[] = {
-       "Watchdog timeout error",
-       "Phy register file parity",
-       "Flag register file parity",
-       "Immediate displacement register file parity",
-       "Address generator payload parity",
-       "EX payload parity",
-       "Checkpoint queue parity",
-       "Retire dispatch queue parity",
+       "Watchdog Timeout error",
+       "Physical register file parity error",
+       "Flag register file parity error",
+       "Immediate displacement register file parity error",
+       "Address generator payload parity error",
+       "EX payload parity error",
+       "Checkpoint queue parity error",
+       "Retire dispatch queue parity error",
        "Retire status queue parity error",
        "Scheduling queue parity error",
        "Branch buffer queue parity error",
+       "Hardware Assertion error",
 };
 
 static const char * const smca_fp_mce_desc[] = {
-       "Physical register file parity",
-       "Freelist parity error",
-       "Schedule queue parity",
+       "Physical register file (PRF) parity error",
+       "Freelist (FL) parity error",
+       "Schedule queue parity error",
        "NSQ parity error",
-       "Retire queue parity",
-       "Status register file parity",
+       "Retire queue (RQ) parity error",
+       "Status register file (SRF) parity error",
        "Hardware assertion",
 };
 
 static const char * const smca_l3_mce_desc[] = {
-       "Shadow tag macro ECC error",
-       "Shadow tag macro multi-way-hit error",
-       "L3M tag ECC error",
-       "L3M tag multi-way-hit error",
-       "L3M data ECC error",
-       "XI parity, L3 fill done channel error",
-       "L3 victim queue parity",
-       "L3 HW assert",
+       "Shadow Tag Macro ECC Error",
+       "Shadow Tag Macro Multi-way-hit Error",
+       "L3M Tag ECC Error",
+       "L3M Tag Multi-way-hit Error",
+       "L3M Data ECC Error",
+       "SDP Parity Error or SystemReadDataError from XI",
+       "L3 Victim Queue Parity Error",
+       "L3 Hardware Assertion",
 };
 
 static const char * const smca_cs_mce_desc[] = {
-       "Illegal request from transport layer",
-       "Address violation",
-       "Security violation",
-       "Illegal response from transport layer",
-       "Unexpected response",
-       "Parity error on incoming request or probe response data",
-       "Parity error on incoming read response data",
-       "Atomic request parity",
-       "ECC error on probe filter access",
+       "Illegal Request",
+       "Address Violation",
+       "Security Violation",
+       "Illegal Response",
+       "Unexpected Response",
+       "Request or Probe Parity Error",
+       "Read Response Parity Error",
+       "Atomic Request Parity Error",
+       "Probe Filter ECC Error",
+};
+
+static const char * const smca_cs2_mce_desc[] = {
+       "Illegal Request",
+       "Address Violation",
+       "Security Violation",
+       "Illegal Response",
+       "Unexpected Response",
+       "Request or Probe Parity Error",
+       "Read Response Parity Error",
+       "Atomic Request Parity Error",
+       "SDP read response had no match in the CS queue",
+       "Probe Filter Protocol Error",
+       "Probe Filter ECC Error",
+       "SDP read response had an unexpected RETRY error",
+       "Counter overflow error",
+       "Counter underflow error",
 };
 
 static const char * const smca_pie_mce_desc[] = {
-       "HW assert",
-       "Internal PIE register security violation",
-       "Error on GMI link",
-       "Poison data written to internal PIE register",
+       "Hardware Assert",
+       "Register security violation",
+       "Link Error",
+       "Poison data consumption",
+       "A deferred error was detected in the DF"
 };
 
 static const char * const smca_umc_mce_desc[] = {
        "DRAM ECC error",
-       "Data poison error on DRAM",
+       "Data poison error",
        "SDP parity error",
        "Advanced peripheral bus error",
-       "Command/address parity error",
+       "Address/Command parity error",
        "Write data CRC error",
+       "DCQ SRAM ECC error",
+       "AES SRAM ECC error",
 };
 
 static const char * const smca_pb_mce_desc[] = {
-       "Parameter Block RAM ECC error",
+       "An ECC error in the Parameter Block RAM array",
 };
 
 static const char * const smca_psp_mce_desc[] = {
-       "PSP RAM ECC or parity error",
+       "An ECC or parity error in a PSP RAM instance",
+};
+
+static const char * const smca_psp2_mce_desc[] = {
+       "High SRAM ECC or parity error",
+       "Low SRAM ECC or parity error",
+       "Instruction Cache Bank 0 ECC or parity error",
+       "Instruction Cache Bank 1 ECC or parity error",
+       "Instruction Tag Ram 0 parity error",
+       "Instruction Tag Ram 1 parity error",
+       "Data Cache Bank 0 ECC or parity error",
+       "Data Cache Bank 1 ECC or parity error",
+       "Data Cache Bank 2 ECC or parity error",
+       "Data Cache Bank 3 ECC or parity error",
+       "Data Tag Bank 0 parity error",
+       "Data Tag Bank 1 parity error",
+       "Data Tag Bank 2 parity error",
+       "Data Tag Bank 3 parity error",
+       "Dirty Data Ram parity error",
+       "TLB Bank 0 parity error",
+       "TLB Bank 1 parity error",
+       "System Hub Read Buffer ECC or parity error",
 };
 
 static const char * const smca_smu_mce_desc[] = {
-       "SMU RAM ECC or parity error",
+       "An ECC or parity error in an SMU RAM instance",
+};
+
+static const char * const smca_smu2_mce_desc[] = {
+       "High SRAM ECC or parity error",
+       "Low SRAM ECC or parity error",
+       "Data Cache Bank A ECC or parity error",
+       "Data Cache Bank B ECC or parity error",
+       "Data Tag Cache Bank A ECC or parity error",
+       "Data Tag Cache Bank B ECC or parity error",
+       "Instruction Cache Bank A ECC or parity error",
+       "Instruction Cache Bank B ECC or parity error",
+       "Instruction Tag Cache Bank A ECC or parity error",
+       "Instruction Tag Cache Bank B ECC or parity error",
+       "System Hub Read Buffer ECC or parity error",
+};
+
+static const char * const smca_mp5_mce_desc[] = {
+       "High SRAM ECC or parity error",
+       "Low SRAM ECC or parity error",
+       "Data Cache Bank A ECC or parity error",
+       "Data Cache Bank B ECC or parity error",
+       "Data Tag Cache Bank A ECC or parity error",
+       "Data Tag Cache Bank B ECC or parity error",
+       "Instruction Cache Bank A ECC or parity error",
+       "Instruction Cache Bank B ECC or parity error",
+       "Instruction Tag Cache Bank A ECC or parity error",
+       "Instruction Tag Cache Bank B ECC or parity error",
+};
+
+static const char * const smca_nbio_mce_desc[] = {
+       "ECC or Parity error",
+       "PCIE error",
+       "SDP ErrEvent error",
+       "SDP Egress Poison Error",
+       "IOHC Internal Poison Error",
+};
+
+static const char * const smca_pcie_mce_desc[] = {
+       "CCIX PER Message logging",
+       "CCIX Read Response with Status: Non-Data Error",
+       "CCIX Write Response with Status: Non-Data Error",
+       "CCIX Read Response with Status: Data Error",
+       "CCIX Non-okay write response with data error",
 };
 
 struct smca_mce_desc {
@@ -299,11 +384,17 @@ static struct smca_mce_desc smca_mce_descs[] = {
        [SMCA_FP]       = { smca_fp_mce_desc,   ARRAY_SIZE(smca_fp_mce_desc)    },
        [SMCA_L3_CACHE] = { smca_l3_mce_desc,   ARRAY_SIZE(smca_l3_mce_desc)    },
        [SMCA_CS]       = { smca_cs_mce_desc,   ARRAY_SIZE(smca_cs_mce_desc)    },
+       [SMCA_CS_V2]    = { smca_cs2_mce_desc,  ARRAY_SIZE(smca_cs2_mce_desc)   },
        [SMCA_PIE]      = { smca_pie_mce_desc,  ARRAY_SIZE(smca_pie_mce_desc)   },
        [SMCA_UMC]      = { smca_umc_mce_desc,  ARRAY_SIZE(smca_umc_mce_desc)   },
        [SMCA_PB]       = { smca_pb_mce_desc,   ARRAY_SIZE(smca_pb_mce_desc)    },
        [SMCA_PSP]      = { smca_psp_mce_desc,  ARRAY_SIZE(smca_psp_mce_desc)   },
+       [SMCA_PSP_V2]   = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc)  },
        [SMCA_SMU]      = { smca_smu_mce_desc,  ARRAY_SIZE(smca_smu_mce_desc)   },
+       [SMCA_SMU_V2]   = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc)  },
+       [SMCA_MP5]      = { smca_mp5_mce_desc,  ARRAY_SIZE(smca_mp5_mce_desc)   },
+       [SMCA_NBIO]     = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc)  },
+       [SMCA_PCIE]     = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc)  },
 };
 
 static bool f12h_mc0_mce(u16 ec, u8 xec)
@@ -874,13 +965,12 @@ static void decode_smca_error(struct mce *m)
 
        ip_name = smca_get_long_name(bank_type);
 
-       pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
+       pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
 
        /* Only print the decode of valid error codes */
        if (xec < smca_mce_descs[bank_type].num_descs &&
                        (hwid->xec_bitmap & BIT_ULL(xec))) {
-               pr_emerg(HW_ERR "%s Error: ", ip_name);
-               pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
+               pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
        }
 
        if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
@@ -961,26 +1051,18 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
                ((m->status & MCI_STATUS_UC)    ? "UE"    :
                 (m->status & MCI_STATUS_DEFERRED) ? "-"  : "CE"),
                ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
-               ((m->status & MCI_STATUS_PCC)   ? "PCC"   : "-"),
-               ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
-
-       if (fam >= 0x15) {
-               pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
-
-               /* F15h, bank4, bit 43 is part of McaStatSubCache. */
-               if (fam != 0x15 || m->bank != 4)
-                       pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
-       }
+               ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"),
+               ((m->status & MCI_STATUS_PCC)   ? "PCC"   : "-"));
 
        if (boot_cpu_has(X86_FEATURE_SMCA)) {
                u32 low, high;
                u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
 
-               pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
-
                if (!rdmsr_safe(addr, &low, &high) &&
                    (low & MCI_CONFIG_MCAX))
                        pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
+
+               pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
        }
 
        /* do the two bits[14:13] together */
@@ -988,6 +1070,17 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
        if (ecc)
                pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
 
+       if (fam >= 0x15) {
+               pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
+
+               /* F15h, bank4, bit 43 is part of McaStatSubCache. */
+               if (fam != 0x15 || m->bank != 4)
+                       pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
+       }
+
+       if (fam >= 0x17)
+               pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
+
        pr_cont("]: 0x%016llx\n", m->status);
 
        if (m->status & MCI_STATUS_ADDRV)
index 3f38907320dccd963246fb22008f35f8e9f52753..95540ea8dd9db905fa76019e9a0ce618c053a84c 100644 (file)
@@ -14,7 +14,7 @@
 #define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
-void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id,
+void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
                            const char *fru_text, const u8 sev, const u8 *err,
                            const u32 len)
 {
index a0794632fd01a4651942a326aecbaddaab5a93fa..36c5c5e38c1d8a0e7ec2757b03d7057b9cc68829 100644 (file)
@@ -27,7 +27,7 @@
 TRACE_EVENT(extlog_mem_event,
        TP_PROTO(struct cper_sec_mem_err *mem,
                 u32 err_seq,
-                const uuid_le *fru_id,
+                const guid_t *fru_id,
                 const char *fru_text,
                 u8 sev),
 
@@ -39,7 +39,7 @@ TRACE_EVENT(extlog_mem_event,
                __field(u8, sev)
                __field(u64, pa)
                __field(u8, pa_mask_lsb)
-               __field_struct(uuid_le, fru_id)
+               __field_struct(guid_t, fru_id)
                __string(fru_text, fru_text)
                __field_struct(struct cper_mem_err_compact, data)
        ),
@@ -218,8 +218,8 @@ TRACE_EVENT(arm_event,
  */
 TRACE_EVENT(non_standard_event,
 
-       TP_PROTO(const uuid_le *sec_type,
-                const uuid_le *fru_id,
+       TP_PROTO(const guid_t *sec_type,
+                const guid_t *fru_id,
                 const char *fru_text,
                 const u8 sev,
                 const u8 *err,