Merge tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Nov 2018 18:17:22 +0000 (11:17 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 2 Nov 2018 18:17:22 +0000 (11:17 -0700)
Pull more EDAC updates from Borislav Petkov:
 "The second part of the EDAC pile which contains the ADXL user and a
  build fix which addresses a not-so-sensical .config but fixes
  randconfig builds people do:

   - skx_edac: Address translation for NVDIMMs (Tony Luck and Qiuxu Zhuo)

   - ACPI_ADXL build fix"

[ I don't think "sensical" is a word, particularly when used in the
  context of actually meaning "nonsensical", but I like it   - Linus ]

* tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  EDAC, skx: Fix randconfig builds
  EDAC, skx_edac: Add address translation for non-volatile DIMMs

drivers/edac/Kconfig
drivers/edac/skx_edac.c
include/linux/adxl.h

index df9467eef32a0e4b67090e56e1a5c0260a0f4037..41c9ccdd20d65658f461991ab4e8bc74d0e6fa4a 100644 (file)
@@ -234,6 +234,7 @@ config EDAC_SKX
        depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
        depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
        select DMI
+       select ACPI_ADXL if ACPI
        help
          Support for error detection and correction the Intel
          Skylake server Integrated Memory Controllers. If your
index dd209e0dd9abb2ca72c0c2b45a5548088852d5c9..a99ea61dad321dddad4ab28bea15ce593ae0c24f 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/bitmap.h>
 #include <linux/math64.h>
 #include <linux/mod_devicetable.h>
+#include <linux/adxl.h>
 #include <acpi/nfit.h>
 #include <asm/cpu_device_id.h>
 #include <asm/intel-family.h>
@@ -35,6 +36,7 @@
 #include "edac_module.h"
 
 #define EDAC_MOD_STR    "skx_edac"
+#define MSG_SIZE       1024
 
 /*
  * Debug macros
 static LIST_HEAD(skx_edac_list);
 
 static u64 skx_tolm, skx_tohm;
+static char *skx_msg;
+static unsigned int nvdimm_count;
+
+enum {
+       INDEX_SOCKET,
+       INDEX_MEMCTRL,
+       INDEX_CHANNEL,
+       INDEX_DIMM,
+       INDEX_MAX
+};
+
+static const char * const component_names[] = {
+       [INDEX_SOCKET]  = "ProcessorSocketId",
+       [INDEX_MEMCTRL] = "MemoryControllerId",
+       [INDEX_CHANNEL] = "ChannelId",
+       [INDEX_DIMM]    = "DimmSlotId",
+};
+
+static int component_indices[ARRAY_SIZE(component_names)];
+static int adxl_component_count;
+static const char * const *adxl_component_names;
+static u64 *adxl_values;
+static char *adxl_msg;
 
 #define NUM_IMC                        2       /* memory controllers per socket */
 #define NUM_CHANNELS           3       /* channels per memory controller */
@@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
        u16 flags;
        u64 size = 0;
 
+       nvdimm_count++;
+
        dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
                                                   imc->src_id, 0);
 
@@ -941,12 +968,46 @@ static void teardown_skx_debug(void)
 }
 #endif /*CONFIG_EDAC_DEBUG*/
 
+static bool skx_adxl_decode(struct decoded_addr *res)
+
+{
+       int i, len = 0;
+
+       if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
+                                     res->addr < BIT_ULL(32))) {
+               edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
+               return false;
+       }
+
+       if (adxl_decode(res->addr, adxl_values)) {
+               edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
+               return false;
+       }
+
+       res->socket  = (int)adxl_values[component_indices[INDEX_SOCKET]];
+       res->imc     = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
+       res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
+       res->dimm    = (int)adxl_values[component_indices[INDEX_DIMM]];
+
+       for (i = 0; i < adxl_component_count; i++) {
+               if (adxl_values[i] == ~0x0ull)
+                       continue;
+
+               len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
+                               adxl_component_names[i], adxl_values[i]);
+               if (MSG_SIZE - len <= 0)
+                       break;
+       }
+
+       return true;
+}
+
 static void skx_mce_output_error(struct mem_ctl_info *mci,
                                 const struct mce *m,
                                 struct decoded_addr *res)
 {
        enum hw_event_mc_err_type tp_event;
-       char *type, *optype, msg[256];
+       char *type, *optype;
        bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
        bool overflow = GET_BITFIELD(m->status, 62, 62);
        bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
                        break;
                }
        }
+       if (adxl_component_count) {
+               snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s",
+                        overflow ? " OVERFLOW" : "",
+                        (uncorrected_error && recoverable) ? " recoverable" : "",
+                        mscod, errcode, adxl_msg);
+       } else {
+               snprintf(skx_msg, MSG_SIZE,
+                        "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
+                        overflow ? " OVERFLOW" : "",
+                        (uncorrected_error && recoverable) ? " recoverable" : "",
+                        mscod, errcode,
+                        res->socket, res->imc, res->rank,
+                        res->bank_group, res->bank_address, res->row, res->column);
+       }
 
-       snprintf(msg, sizeof(msg),
-                "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
-                overflow ? " OVERFLOW" : "",
-                (uncorrected_error && recoverable) ? " recoverable" : "",
-                mscod, errcode,
-                res->socket, res->imc, res->rank,
-                res->bank_group, res->bank_address, res->row, res->column);
-
-       edac_dbg(0, "%s\n", msg);
+       edac_dbg(0, "%s\n", skx_msg);
 
        /* Call the helper to output message */
        edac_mc_handle_error(tp_event, mci, core_err_cnt,
                             m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
                             res->channel, res->dimm, -1,
-                            optype, msg);
+                            optype, skx_msg);
+}
+
+static struct mem_ctl_info *get_mci(int src_id, int lmc)
+{
+       struct skx_dev *d;
+
+       if (lmc > NUM_IMC - 1) {
+               skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
+               return NULL;
+       }
+
+       list_for_each_entry(d, &skx_edac_list, list) {
+               if (d->imc[0].src_id == src_id)
+                       return d->imc[lmc].mci;
+       }
+
+       skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
+
+       return NULL;
 }
 
 static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
@@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
        if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
                return NOTIFY_DONE;
 
+       memset(&res, 0, sizeof(res));
        res.addr = mce->addr;
-       if (!skx_decode(&res))
+
+       if (adxl_component_count) {
+               if (!skx_adxl_decode(&res))
+                       return NOTIFY_DONE;
+
+               mci = get_mci(res.socket, res.imc);
+       } else {
+               if (!skx_decode(&res))
+                       return NOTIFY_DONE;
+
+               mci = res.dev->imc[res.imc].mci;
+       }
+
+       if (!mci)
                return NOTIFY_DONE;
-       mci = res.dev->imc[res.imc].mci;
 
        if (mce->mcgstatus & MCG_STATUS_MCIP)
                type = "Exception";
@@ -1094,6 +1193,62 @@ static void skx_remove(void)
        }
 }
 
+static void __init skx_adxl_get(void)
+{
+       const char * const *names;
+       int i, j;
+
+       names = adxl_get_component_names();
+       if (!names) {
+               skx_printk(KERN_NOTICE, "No firmware support for address translation.");
+               skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
+               return;
+       }
+
+       for (i = 0; i < INDEX_MAX; i++) {
+               for (j = 0; names[j]; j++) {
+                       if (!strcmp(component_names[i], names[j])) {
+                               component_indices[i] = j;
+                               break;
+                       }
+               }
+
+               if (!names[j])
+                       goto err;
+       }
+
+       adxl_component_names = names;
+       while (*names++)
+               adxl_component_count++;
+
+       adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
+                             GFP_KERNEL);
+       if (!adxl_values) {
+               adxl_component_count = 0;
+               return;
+       }
+
+       adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
+       if (!adxl_msg) {
+               adxl_component_count = 0;
+               kfree(adxl_values);
+       }
+
+       return;
+err:
+       skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
+                  component_names[i]);
+       for (j = 0; names[j]; j++)
+               skx_printk(KERN_CONT, "%s ", names[j]);
+       skx_printk(KERN_CONT, "\n");
+}
+
+static void __exit skx_adxl_put(void)
+{
+       kfree(adxl_values);
+       kfree(adxl_msg);
+}
+
 /*
  * skx_init:
  *     make sure we are running on the correct cpu model
@@ -1158,6 +1313,15 @@ static int __init skx_init(void)
                }
        }
 
+       skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
+       if (!skx_msg) {
+               rc = -ENOMEM;
+               goto fail;
+       }
+
+       if (nvdimm_count)
+               skx_adxl_get();
+
        /* Ensure that the OPSTATE is set correctly for POLL or NMI */
        opstate_init();
 
@@ -1176,6 +1340,9 @@ static void __exit skx_exit(void)
        edac_dbg(2, "\n");
        mce_unregister_decode_chain(&skx_mce_dec);
        skx_remove();
+       if (nvdimm_count)
+               skx_adxl_put();
+       kfree(skx_msg);
        teardown_skx_debug();
 }
 
index 2a629acb4c3f467221c8db81ded322780f31b99a..2d29f55923e3ad2e79c70cd0832a8c8523f96ef8 100644 (file)
@@ -7,7 +7,12 @@
 #ifndef _LINUX_ADXL_H
 #define _LINUX_ADXL_H
 
+#ifdef CONFIG_ACPI_ADXL
 const char * const *adxl_get_component_names(void);
 int adxl_decode(u64 addr, u64 component_values[]);
+#else
+static inline const char * const *adxl_get_component_names(void)  { return NULL; }
+static inline int adxl_decode(u64 addr, u64 component_values[])   { return  -EOPNOTSUPP; }
+#endif
 
 #endif /* _LINUX_ADXL_H */