net: hns3: Add enable and process common ecc errors
[muen/linux.git] / drivers / net / ethernet / hisilicon / hns3 / hns3pf / hclge_err.c
1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (c) 2016-2017 Hisilicon Limited. */
3
4 #include "hclge_err.h"
5
6 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
7         { .int_msk = BIT(0), .msg = "imp_itcm0_ecc_1bit_err" },
8         { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" },
9         { .int_msk = BIT(2), .msg = "imp_itcm1_ecc_1bit_err" },
10         { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" },
11         { .int_msk = BIT(4), .msg = "imp_itcm2_ecc_1bit_err" },
12         { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" },
13         { .int_msk = BIT(6), .msg = "imp_itcm3_ecc_1bit_err" },
14         { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" },
15         { .int_msk = BIT(8), .msg = "imp_dtcm0_mem0_ecc_1bit_err" },
16         { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" },
17         { .int_msk = BIT(10), .msg = "imp_dtcm0_mem1_ecc_1bit_err" },
18         { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" },
19         { .int_msk = BIT(12), .msg = "imp_dtcm1_mem0_ecc_1bit_err" },
20         { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" },
21         { .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" },
22         { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" },
23         { /* sentinel */ }
24 };
25
26 static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = {
27         { .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" },
28         { .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" },
29         { /* sentinel */ }
30 };
31
32 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
33         { .int_msk = BIT(0), .msg = "cmdq_nic_rx_depth_ecc_1bit_err" },
34         { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" },
35         { .int_msk = BIT(2), .msg = "cmdq_nic_tx_depth_ecc_1bit_err" },
36         { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" },
37         { .int_msk = BIT(4), .msg = "cmdq_nic_rx_tail_ecc_1bit_err" },
38         { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" },
39         { .int_msk = BIT(6), .msg = "cmdq_nic_tx_tail_ecc_1bit_err" },
40         { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" },
41         { .int_msk = BIT(8), .msg = "cmdq_nic_rx_head_ecc_1bit_err" },
42         { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" },
43         { .int_msk = BIT(10), .msg = "cmdq_nic_tx_head_ecc_1bit_err" },
44         { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" },
45         { .int_msk = BIT(12), .msg = "cmdq_nic_rx_addr_ecc_1bit_err" },
46         { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" },
47         { .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" },
48         { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" },
49         { /* sentinel */ }
50 };
51
52 static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = {
53         { .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" },
54         { .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
55         { .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" },
56         { .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
57         { .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" },
58         { .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
59         { .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" },
60         { .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
61         { .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" },
62         { .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
63         { .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" },
64         { .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
65         { .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" },
66         { .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
67         { .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" },
68         { .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
69         { /* sentinel */ }
70 };
71
72 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
73         { .int_msk = BIT(0), .msg = "tqp_int_cfg_even_ecc_1bit_err" },
74         { .int_msk = BIT(1), .msg = "tqp_int_cfg_odd_ecc_1bit_err" },
75         { .int_msk = BIT(2), .msg = "tqp_int_ctrl_even_ecc_1bit_err" },
76         { .int_msk = BIT(3), .msg = "tqp_int_ctrl_odd_ecc_1bit_err" },
77         { .int_msk = BIT(4), .msg = "tx_que_scan_int_ecc_1bit_err" },
78         { .int_msk = BIT(5), .msg = "rx_que_scan_int_ecc_1bit_err" },
79         { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" },
80         { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" },
81         { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" },
82         { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err" },
83         { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err" },
84         { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err" },
85         { /* sentinel */ }
86 };
87
88 static void hclge_log_error(struct device *dev,
89                             const struct hclge_hw_error *err_list,
90                             u32 err_sts)
91 {
92         const struct hclge_hw_error *err;
93         int i = 0;
94
95         while (err_list[i].msg) {
96                 err = &err_list[i];
97                 if (!(err->int_msk & err_sts)) {
98                         i++;
99                         continue;
100                 }
101                 dev_warn(dev, "%s [error status=0x%x] found\n",
102                          err->msg, err_sts);
103                 i++;
104         }
105 }
106
107 /* hclge_cmd_query_error: read the error information
108  * @hdev: pointer to struct hclge_dev
109  * @desc: descriptor for describing the command
110  * @cmd:  command opcode
111  * @flag: flag for extended command structure
112  * @w_num: offset for setting the read interrupt type.
113  * @int_type: select which type of the interrupt for which the error
114  * info will be read(RAS-CE/RAS-NFE/RAS-FE etc).
115  *
116  * This function query the error info from hw register/s using command
117  */
118 static int hclge_cmd_query_error(struct hclge_dev *hdev,
119                                  struct hclge_desc *desc, u32 cmd,
120                                  u16 flag, u8 w_num,
121                                  enum hclge_err_int_type int_type)
122 {
123         struct device *dev = &hdev->pdev->dev;
124         int num = 1;
125         int ret;
126
127         hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
128         if (flag) {
129                 desc[0].flag |= cpu_to_le16(flag);
130                 hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
131                 num = 2;
132         }
133         if (w_num)
134                 desc[0].data[w_num] = cpu_to_le32(int_type);
135
136         ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
137         if (ret)
138                 dev_err(dev, "query error cmd failed (%d)\n", ret);
139
140         return ret;
141 }
142
143 /* hclge_cmd_clear_error: clear the error status
144  * @hdev: pointer to struct hclge_dev
145  * @desc: descriptor for describing the command
146  * @desc_src: prefilled descriptor from the previous command for reusing
147  * @cmd:  command opcode
148  * @flag: flag for extended command structure
149  *
150  * This function clear the error status in the hw register/s using command
151  */
152 static int hclge_cmd_clear_error(struct hclge_dev *hdev,
153                                  struct hclge_desc *desc,
154                                  struct hclge_desc *desc_src,
155                                  u32 cmd, u16 flag)
156 {
157         struct device *dev = &hdev->pdev->dev;
158         int num = 1;
159         int ret, i;
160
161         if (cmd) {
162                 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
163                 if (flag) {
164                         desc[0].flag |= cpu_to_le16(flag);
165                         hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
166                         num = 2;
167                 }
168                 if (desc_src) {
169                         for (i = 0; i < 6; i++) {
170                                 desc[0].data[i] = desc_src[0].data[i];
171                                 if (flag)
172                                         desc[1].data[i] = desc_src[1].data[i];
173                         }
174                 }
175         } else {
176                 hclge_cmd_reuse_desc(&desc[0], false);
177                 if (flag) {
178                         desc[0].flag |= cpu_to_le16(flag);
179                         hclge_cmd_reuse_desc(&desc[1], false);
180                         num = 2;
181                 }
182         }
183         ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
184         if (ret)
185                 dev_err(dev, "clear error cmd failed (%d)\n", ret);
186
187         return ret;
188 }
189
190 static int hclge_enable_common_error(struct hclge_dev *hdev, bool en)
191 {
192         struct device *dev = &hdev->pdev->dev;
193         struct hclge_desc desc[2];
194         int ret;
195
196         hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false);
197         desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
198         hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false);
199
200         if (en) {
201                 /* enable COMMON error interrupts */
202                 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN);
203                 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
204                                         HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
205                 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
206                 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN);
207                 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
208         } else {
209                 /* disable COMMON error interrupts */
210                 desc[0].data[0] = 0;
211                 desc[0].data[2] = 0;
212                 desc[0].data[3] = 0;
213                 desc[0].data[4] = 0;
214                 desc[0].data[5] = 0;
215         }
216         desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK);
217         desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
218                                 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
219         desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
220         desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK);
221         desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
222
223         ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
224         if (ret)
225                 dev_err(dev,
226                         "failed(%d) to enable/disable COMMON err interrupts\n",
227                         ret);
228
229         return ret;
230 }
231
232 static void hclge_process_common_error(struct hclge_dev *hdev,
233                                        enum hclge_err_int_type type)
234 {
235         struct device *dev = &hdev->pdev->dev;
236         struct hclge_desc desc[2];
237         u32 err_sts;
238         int ret;
239
240         /* read err sts */
241         ret = hclge_cmd_query_error(hdev, &desc[0],
242                                     HCLGE_COMMON_ECC_INT_CFG,
243                                     HCLGE_CMD_FLAG_NEXT, 0, 0);
244         if (ret) {
245                 dev_err(dev,
246                         "failed(=%d) to query COMMON error interrupt status\n",
247                         ret);
248                 return;
249         }
250
251         /* log err */
252         err_sts = (le32_to_cpu(desc[0].data[0])) & HCLGE_IMP_TCM_ECC_INT_MASK;
253         hclge_log_error(dev, &hclge_imp_tcm_ecc_int[0], err_sts);
254
255         err_sts = (le32_to_cpu(desc[0].data[1])) & HCLGE_CMDQ_ECC_INT_MASK;
256         hclge_log_error(dev, &hclge_cmdq_nic_mem_ecc_int[0], err_sts);
257
258         err_sts = (le32_to_cpu(desc[0].data[1]) >> HCLGE_CMDQ_ROC_ECC_INT_SHIFT)
259                    & HCLGE_CMDQ_ECC_INT_MASK;
260         hclge_log_error(dev, &hclge_cmdq_rocee_mem_ecc_int[0], err_sts);
261
262         if ((le32_to_cpu(desc[0].data[3])) & BIT(0))
263                 dev_warn(dev, "imp_rd_data_poison_err found\n");
264
265         err_sts = (le32_to_cpu(desc[0].data[3]) >> HCLGE_TQP_ECC_INT_SHIFT) &
266                    HCLGE_TQP_ECC_INT_MASK;
267         hclge_log_error(dev, &hclge_tqp_int_ecc_int[0], err_sts);
268
269         err_sts = (le32_to_cpu(desc[0].data[5])) &
270                    HCLGE_IMP_ITCM4_ECC_INT_MASK;
271         hclge_log_error(dev, &hclge_imp_itcm4_ecc_int[0], err_sts);
272
273         /* clear error interrupts */
274         desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_CLR_MASK);
275         desc[1].data[1] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_CLR_MASK |
276                                 HCLGE_CMDQ_ROCEE_ECC_CLR_MASK);
277         desc[1].data[3] = cpu_to_le32(HCLGE_TQP_IMP_ERR_CLR_MASK);
278         desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_CLR_MASK);
279
280         ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0,
281                                     HCLGE_CMD_FLAG_NEXT);
282         if (ret)
283                 dev_err(dev,
284                         "failed(%d) to clear COMMON error interrupt status\n",
285                         ret);
286 }
287
288 static const struct hclge_hw_blk hw_blk[] = {
289         { .msk = BIT(5), .name = "COMMON",
290           .enable_error = hclge_enable_common_error,
291           .process_error = hclge_process_common_error, },
292         { /* sentinel */ }
293 };
294
295 int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state)
296 {
297         struct device *dev = &hdev->pdev->dev;
298         int ret = 0;
299         int i = 0;
300
301         while (hw_blk[i].name) {
302                 if (!hw_blk[i].enable_error) {
303                         i++;
304                         continue;
305                 }
306                 ret = hw_blk[i].enable_error(hdev, state);
307                 if (ret) {
308                         dev_err(dev, "fail(%d) to en/disable err int\n", ret);
309                         return ret;
310                 }
311                 i++;
312         }
313
314         return ret;
315 }
316
317 pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev)
318 {
319         struct hclge_dev *hdev = ae_dev->priv;
320         struct device *dev = &hdev->pdev->dev;
321         u32 sts, val;
322         int i = 0;
323
324         sts = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
325
326         /* Processing Non-fatal errors */
327         if (sts & HCLGE_RAS_REG_NFE_MASK) {
328                 val = (sts >> HCLGE_RAS_REG_NFE_SHIFT) & 0xFF;
329                 i = 0;
330                 while (hw_blk[i].name) {
331                         if (!(hw_blk[i].msk & val)) {
332                                 i++;
333                                 continue;
334                         }
335                         dev_warn(dev, "%s ras non-fatal error identified\n",
336                                  hw_blk[i].name);
337                         if (hw_blk[i].process_error)
338                                 hw_blk[i].process_error(hdev,
339                                                          HCLGE_ERR_INT_RAS_NFE);
340                         i++;
341                 }
342         }
343
344         return PCI_ERS_RESULT_NEED_RESET;
345 }