1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (c) 2016-2017 Hisilicon Limited. */
6 static const struct hclge_hw_error hclge_imp_tcm_ecc_int[] = {
7 { .int_msk = BIT(0), .msg = "imp_itcm0_ecc_1bit_err" },
8 { .int_msk = BIT(1), .msg = "imp_itcm0_ecc_mbit_err" },
9 { .int_msk = BIT(2), .msg = "imp_itcm1_ecc_1bit_err" },
10 { .int_msk = BIT(3), .msg = "imp_itcm1_ecc_mbit_err" },
11 { .int_msk = BIT(4), .msg = "imp_itcm2_ecc_1bit_err" },
12 { .int_msk = BIT(5), .msg = "imp_itcm2_ecc_mbit_err" },
13 { .int_msk = BIT(6), .msg = "imp_itcm3_ecc_1bit_err" },
14 { .int_msk = BIT(7), .msg = "imp_itcm3_ecc_mbit_err" },
15 { .int_msk = BIT(8), .msg = "imp_dtcm0_mem0_ecc_1bit_err" },
16 { .int_msk = BIT(9), .msg = "imp_dtcm0_mem0_ecc_mbit_err" },
17 { .int_msk = BIT(10), .msg = "imp_dtcm0_mem1_ecc_1bit_err" },
18 { .int_msk = BIT(11), .msg = "imp_dtcm0_mem1_ecc_mbit_err" },
19 { .int_msk = BIT(12), .msg = "imp_dtcm1_mem0_ecc_1bit_err" },
20 { .int_msk = BIT(13), .msg = "imp_dtcm1_mem0_ecc_mbit_err" },
21 { .int_msk = BIT(14), .msg = "imp_dtcm1_mem1_ecc_1bit_err" },
22 { .int_msk = BIT(15), .msg = "imp_dtcm1_mem1_ecc_mbit_err" },
26 static const struct hclge_hw_error hclge_imp_itcm4_ecc_int[] = {
27 { .int_msk = BIT(0), .msg = "imp_itcm4_ecc_1bit_err" },
28 { .int_msk = BIT(1), .msg = "imp_itcm4_ecc_mbit_err" },
32 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int[] = {
33 { .int_msk = BIT(0), .msg = "cmdq_nic_rx_depth_ecc_1bit_err" },
34 { .int_msk = BIT(1), .msg = "cmdq_nic_rx_depth_ecc_mbit_err" },
35 { .int_msk = BIT(2), .msg = "cmdq_nic_tx_depth_ecc_1bit_err" },
36 { .int_msk = BIT(3), .msg = "cmdq_nic_tx_depth_ecc_mbit_err" },
37 { .int_msk = BIT(4), .msg = "cmdq_nic_rx_tail_ecc_1bit_err" },
38 { .int_msk = BIT(5), .msg = "cmdq_nic_rx_tail_ecc_mbit_err" },
39 { .int_msk = BIT(6), .msg = "cmdq_nic_tx_tail_ecc_1bit_err" },
40 { .int_msk = BIT(7), .msg = "cmdq_nic_tx_tail_ecc_mbit_err" },
41 { .int_msk = BIT(8), .msg = "cmdq_nic_rx_head_ecc_1bit_err" },
42 { .int_msk = BIT(9), .msg = "cmdq_nic_rx_head_ecc_mbit_err" },
43 { .int_msk = BIT(10), .msg = "cmdq_nic_tx_head_ecc_1bit_err" },
44 { .int_msk = BIT(11), .msg = "cmdq_nic_tx_head_ecc_mbit_err" },
45 { .int_msk = BIT(12), .msg = "cmdq_nic_rx_addr_ecc_1bit_err" },
46 { .int_msk = BIT(13), .msg = "cmdq_nic_rx_addr_ecc_mbit_err" },
47 { .int_msk = BIT(14), .msg = "cmdq_nic_tx_addr_ecc_1bit_err" },
48 { .int_msk = BIT(15), .msg = "cmdq_nic_tx_addr_ecc_mbit_err" },
52 static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int[] = {
53 { .int_msk = BIT(0), .msg = "cmdq_rocee_rx_depth_ecc_1bit_err" },
54 { .int_msk = BIT(1), .msg = "cmdq_rocee_rx_depth_ecc_mbit_err" },
55 { .int_msk = BIT(2), .msg = "cmdq_rocee_tx_depth_ecc_1bit_err" },
56 { .int_msk = BIT(3), .msg = "cmdq_rocee_tx_depth_ecc_mbit_err" },
57 { .int_msk = BIT(4), .msg = "cmdq_rocee_rx_tail_ecc_1bit_err" },
58 { .int_msk = BIT(5), .msg = "cmdq_rocee_rx_tail_ecc_mbit_err" },
59 { .int_msk = BIT(6), .msg = "cmdq_rocee_tx_tail_ecc_1bit_err" },
60 { .int_msk = BIT(7), .msg = "cmdq_rocee_tx_tail_ecc_mbit_err" },
61 { .int_msk = BIT(8), .msg = "cmdq_rocee_rx_head_ecc_1bit_err" },
62 { .int_msk = BIT(9), .msg = "cmdq_rocee_rx_head_ecc_mbit_err" },
63 { .int_msk = BIT(10), .msg = "cmdq_rocee_tx_head_ecc_1bit_err" },
64 { .int_msk = BIT(11), .msg = "cmdq_rocee_tx_head_ecc_mbit_err" },
65 { .int_msk = BIT(12), .msg = "cmdq_rocee_rx_addr_ecc_1bit_err" },
66 { .int_msk = BIT(13), .msg = "cmdq_rocee_rx_addr_ecc_mbit_err" },
67 { .int_msk = BIT(14), .msg = "cmdq_rocee_tx_addr_ecc_1bit_err" },
68 { .int_msk = BIT(15), .msg = "cmdq_rocee_tx_addr_ecc_mbit_err" },
72 static const struct hclge_hw_error hclge_tqp_int_ecc_int[] = {
73 { .int_msk = BIT(0), .msg = "tqp_int_cfg_even_ecc_1bit_err" },
74 { .int_msk = BIT(1), .msg = "tqp_int_cfg_odd_ecc_1bit_err" },
75 { .int_msk = BIT(2), .msg = "tqp_int_ctrl_even_ecc_1bit_err" },
76 { .int_msk = BIT(3), .msg = "tqp_int_ctrl_odd_ecc_1bit_err" },
77 { .int_msk = BIT(4), .msg = "tx_que_scan_int_ecc_1bit_err" },
78 { .int_msk = BIT(5), .msg = "rx_que_scan_int_ecc_1bit_err" },
79 { .int_msk = BIT(6), .msg = "tqp_int_cfg_even_ecc_mbit_err" },
80 { .int_msk = BIT(7), .msg = "tqp_int_cfg_odd_ecc_mbit_err" },
81 { .int_msk = BIT(8), .msg = "tqp_int_ctrl_even_ecc_mbit_err" },
82 { .int_msk = BIT(9), .msg = "tqp_int_ctrl_odd_ecc_mbit_err" },
83 { .int_msk = BIT(10), .msg = "tx_que_scan_int_ecc_mbit_err" },
84 { .int_msk = BIT(11), .msg = "rx_que_scan_int_ecc_mbit_err" },
88 static void hclge_log_error(struct device *dev,
89 const struct hclge_hw_error *err_list,
92 const struct hclge_hw_error *err;
95 while (err_list[i].msg) {
97 if (!(err->int_msk & err_sts)) {
101 dev_warn(dev, "%s [error status=0x%x] found\n",
107 /* hclge_cmd_query_error: read the error information
108 * @hdev: pointer to struct hclge_dev
109 * @desc: descriptor for describing the command
110 * @cmd: command opcode
111 * @flag: flag for extended command structure
112 * @w_num: offset for setting the read interrupt type.
113 * @int_type: select which type of the interrupt for which the error
114 * info will be read(RAS-CE/RAS-NFE/RAS-FE etc).
116 * This function query the error info from hw register/s using command
118 static int hclge_cmd_query_error(struct hclge_dev *hdev,
119 struct hclge_desc *desc, u32 cmd,
121 enum hclge_err_int_type int_type)
123 struct device *dev = &hdev->pdev->dev;
127 hclge_cmd_setup_basic_desc(&desc[0], cmd, true);
129 desc[0].flag |= cpu_to_le16(flag);
130 hclge_cmd_setup_basic_desc(&desc[1], cmd, true);
134 desc[0].data[w_num] = cpu_to_le32(int_type);
136 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
138 dev_err(dev, "query error cmd failed (%d)\n", ret);
143 /* hclge_cmd_clear_error: clear the error status
144 * @hdev: pointer to struct hclge_dev
145 * @desc: descriptor for describing the command
146 * @desc_src: prefilled descriptor from the previous command for reusing
147 * @cmd: command opcode
148 * @flag: flag for extended command structure
150 * This function clear the error status in the hw register/s using command
152 static int hclge_cmd_clear_error(struct hclge_dev *hdev,
153 struct hclge_desc *desc,
154 struct hclge_desc *desc_src,
157 struct device *dev = &hdev->pdev->dev;
162 hclge_cmd_setup_basic_desc(&desc[0], cmd, false);
164 desc[0].flag |= cpu_to_le16(flag);
165 hclge_cmd_setup_basic_desc(&desc[1], cmd, false);
169 for (i = 0; i < 6; i++) {
170 desc[0].data[i] = desc_src[0].data[i];
172 desc[1].data[i] = desc_src[1].data[i];
176 hclge_cmd_reuse_desc(&desc[0], false);
178 desc[0].flag |= cpu_to_le16(flag);
179 hclge_cmd_reuse_desc(&desc[1], false);
183 ret = hclge_cmd_send(&hdev->hw, &desc[0], num);
185 dev_err(dev, "clear error cmd failed (%d)\n", ret);
190 static int hclge_enable_common_error(struct hclge_dev *hdev, bool en)
192 struct device *dev = &hdev->pdev->dev;
193 struct hclge_desc desc[2];
196 hclge_cmd_setup_basic_desc(&desc[0], HCLGE_COMMON_ECC_INT_CFG, false);
197 desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
198 hclge_cmd_setup_basic_desc(&desc[1], HCLGE_COMMON_ECC_INT_CFG, false);
201 /* enable COMMON error interrupts */
202 desc[0].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN);
203 desc[0].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN |
204 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN);
205 desc[0].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN);
206 desc[0].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN);
207 desc[0].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN);
209 /* disable COMMON error interrupts */
216 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK);
217 desc[1].data[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK |
218 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK);
219 desc[1].data[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK);
220 desc[1].data[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK);
221 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK);
223 ret = hclge_cmd_send(&hdev->hw, &desc[0], 2);
226 "failed(%d) to enable/disable COMMON err interrupts\n",
232 static void hclge_process_common_error(struct hclge_dev *hdev,
233 enum hclge_err_int_type type)
235 struct device *dev = &hdev->pdev->dev;
236 struct hclge_desc desc[2];
241 ret = hclge_cmd_query_error(hdev, &desc[0],
242 HCLGE_COMMON_ECC_INT_CFG,
243 HCLGE_CMD_FLAG_NEXT, 0, 0);
246 "failed(=%d) to query COMMON error interrupt status\n",
252 err_sts = (le32_to_cpu(desc[0].data[0])) & HCLGE_IMP_TCM_ECC_INT_MASK;
253 hclge_log_error(dev, &hclge_imp_tcm_ecc_int[0], err_sts);
255 err_sts = (le32_to_cpu(desc[0].data[1])) & HCLGE_CMDQ_ECC_INT_MASK;
256 hclge_log_error(dev, &hclge_cmdq_nic_mem_ecc_int[0], err_sts);
258 err_sts = (le32_to_cpu(desc[0].data[1]) >> HCLGE_CMDQ_ROC_ECC_INT_SHIFT)
259 & HCLGE_CMDQ_ECC_INT_MASK;
260 hclge_log_error(dev, &hclge_cmdq_rocee_mem_ecc_int[0], err_sts);
262 if ((le32_to_cpu(desc[0].data[3])) & BIT(0))
263 dev_warn(dev, "imp_rd_data_poison_err found\n");
265 err_sts = (le32_to_cpu(desc[0].data[3]) >> HCLGE_TQP_ECC_INT_SHIFT) &
266 HCLGE_TQP_ECC_INT_MASK;
267 hclge_log_error(dev, &hclge_tqp_int_ecc_int[0], err_sts);
269 err_sts = (le32_to_cpu(desc[0].data[5])) &
270 HCLGE_IMP_ITCM4_ECC_INT_MASK;
271 hclge_log_error(dev, &hclge_imp_itcm4_ecc_int[0], err_sts);
273 /* clear error interrupts */
274 desc[1].data[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_CLR_MASK);
275 desc[1].data[1] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_CLR_MASK |
276 HCLGE_CMDQ_ROCEE_ECC_CLR_MASK);
277 desc[1].data[3] = cpu_to_le32(HCLGE_TQP_IMP_ERR_CLR_MASK);
278 desc[1].data[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_CLR_MASK);
280 ret = hclge_cmd_clear_error(hdev, &desc[0], NULL, 0,
281 HCLGE_CMD_FLAG_NEXT);
284 "failed(%d) to clear COMMON error interrupt status\n",
288 static const struct hclge_hw_blk hw_blk[] = {
289 { .msk = BIT(5), .name = "COMMON",
290 .enable_error = hclge_enable_common_error,
291 .process_error = hclge_process_common_error, },
295 int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state)
297 struct device *dev = &hdev->pdev->dev;
301 while (hw_blk[i].name) {
302 if (!hw_blk[i].enable_error) {
306 ret = hw_blk[i].enable_error(hdev, state);
308 dev_err(dev, "fail(%d) to en/disable err int\n", ret);
317 pci_ers_result_t hclge_process_ras_hw_error(struct hnae3_ae_dev *ae_dev)
319 struct hclge_dev *hdev = ae_dev->priv;
320 struct device *dev = &hdev->pdev->dev;
324 sts = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG);
326 /* Processing Non-fatal errors */
327 if (sts & HCLGE_RAS_REG_NFE_MASK) {
328 val = (sts >> HCLGE_RAS_REG_NFE_SHIFT) & 0xFF;
330 while (hw_blk[i].name) {
331 if (!(hw_blk[i].msk & val)) {
335 dev_warn(dev, "%s ras non-fatal error identified\n",
337 if (hw_blk[i].process_error)
338 hw_blk[i].process_error(hdev,
339 HCLGE_ERR_INT_RAS_NFE);
344 return PCI_ERS_RESULT_NEED_RESET;