Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
[muen/linux.git] / drivers / net / ethernet / intel / ixgbe / ixgbe_main.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright(c) 1999 - 2018 Intel Corporation. */
3
4 #include <linux/types.h>
5 #include <linux/module.h>
6 #include <linux/pci.h>
7 #include <linux/netdevice.h>
8 #include <linux/vmalloc.h>
9 #include <linux/string.h>
10 #include <linux/in.h>
11 #include <linux/interrupt.h>
12 #include <linux/ip.h>
13 #include <linux/tcp.h>
14 #include <linux/sctp.h>
15 #include <linux/pkt_sched.h>
16 #include <linux/ipv6.h>
17 #include <linux/slab.h>
18 #include <net/checksum.h>
19 #include <net/ip6_checksum.h>
20 #include <linux/etherdevice.h>
21 #include <linux/ethtool.h>
22 #include <linux/if.h>
23 #include <linux/if_vlan.h>
24 #include <linux/if_macvlan.h>
25 #include <linux/if_bridge.h>
26 #include <linux/prefetch.h>
27 #include <linux/bpf.h>
28 #include <linux/bpf_trace.h>
29 #include <linux/atomic.h>
30 #include <scsi/fc/fc_fcoe.h>
31 #include <net/udp_tunnel.h>
32 #include <net/pkt_cls.h>
33 #include <net/tc_act/tc_gact.h>
34 #include <net/tc_act/tc_mirred.h>
35 #include <net/vxlan.h>
36 #include <net/mpls.h>
37 #include <net/xdp_sock.h>
38
39 #include "ixgbe.h"
40 #include "ixgbe_common.h"
41 #include "ixgbe_dcb_82599.h"
42 #include "ixgbe_sriov.h"
43 #include "ixgbe_model.h"
44 #include "ixgbe_txrx_common.h"
45
46 char ixgbe_driver_name[] = "ixgbe";
47 static const char ixgbe_driver_string[] =
48                               "Intel(R) 10 Gigabit PCI Express Network Driver";
49 #ifdef IXGBE_FCOE
50 char ixgbe_default_device_descr[] =
51                               "Intel(R) 10 Gigabit Network Connection";
52 #else
53 static char ixgbe_default_device_descr[] =
54                               "Intel(R) 10 Gigabit Network Connection";
55 #endif
56 #define DRV_VERSION "5.1.0-k"
57 const char ixgbe_driver_version[] = DRV_VERSION;
58 static const char ixgbe_copyright[] =
59                                 "Copyright (c) 1999-2016 Intel Corporation.";
60
61 static const char ixgbe_overheat_msg[] = "Network adapter has been stopped because it has over heated. Restart the computer. If the problem persists, power off the system and replace the adapter";
62
63 static const struct ixgbe_info *ixgbe_info_tbl[] = {
64         [board_82598]           = &ixgbe_82598_info,
65         [board_82599]           = &ixgbe_82599_info,
66         [board_X540]            = &ixgbe_X540_info,
67         [board_X550]            = &ixgbe_X550_info,
68         [board_X550EM_x]        = &ixgbe_X550EM_x_info,
69         [board_x550em_x_fw]     = &ixgbe_x550em_x_fw_info,
70         [board_x550em_a]        = &ixgbe_x550em_a_info,
71         [board_x550em_a_fw]     = &ixgbe_x550em_a_fw_info,
72 };
73
74 /* ixgbe_pci_tbl - PCI Device ID Table
75  *
76  * Wildcard entries (PCI_ANY_ID) should come last
77  * Last entry must be all 0s
78  *
79  * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
80  *   Class, Class Mask, private data (not used) }
81  */
82 static const struct pci_device_id ixgbe_pci_tbl[] = {
83         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598), board_82598 },
84         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT), board_82598 },
85         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT), board_82598 },
86         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT), board_82598 },
87         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT2), board_82598 },
88         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4), board_82598 },
89         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT), board_82598 },
90         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT), board_82598 },
91         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM), board_82598 },
92         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_XF_LR), board_82598 },
93         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM), board_82598 },
94         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_BX), board_82598 },
95         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4), board_82599 },
96         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_XAUI_LOM), board_82599 },
97         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KR), board_82599 },
98         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP), board_82599 },
99         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_EM), board_82599 },
100         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ), board_82599 },
101         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4), board_82599 },
102         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE), board_82599 },
103         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_FCOE), board_82599 },
104         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_T3_LOM), board_82599 },
105         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE), board_82599 },
106         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T), board_X540 },
107         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2), board_82599 },
108         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS), board_82599 },
109         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP), board_82599 },
110         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP), board_82599 },
111         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP), board_82599 },
112         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T1), board_X540 },
113         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550T), board_X550},
114         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550T1), board_X550},
115         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_KX4), board_X550EM_x},
116         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_XFI), board_X550EM_x},
117         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_KR), board_X550EM_x},
118         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_10G_T), board_X550EM_x},
119         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_SFP), board_X550EM_x},
120         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_1G_T), board_x550em_x_fw},
121         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_KR), board_x550em_a },
122         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_KR_L), board_x550em_a },
123         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N), board_x550em_a },
124         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII), board_x550em_a },
125         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L), board_x550em_a },
126         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_10G_T), board_x550em_a},
127         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP), board_x550em_a },
128         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_1G_T), board_x550em_a_fw },
129         {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L), board_x550em_a_fw },
130         /* required last entry */
131         {0, }
132 };
133 MODULE_DEVICE_TABLE(pci, ixgbe_pci_tbl);
134
135 #ifdef CONFIG_IXGBE_DCA
136 static int ixgbe_notify_dca(struct notifier_block *, unsigned long event,
137                             void *p);
138 static struct notifier_block dca_notifier = {
139         .notifier_call = ixgbe_notify_dca,
140         .next          = NULL,
141         .priority      = 0
142 };
143 #endif
144
145 #ifdef CONFIG_PCI_IOV
146 static unsigned int max_vfs;
147 module_param(max_vfs, uint, 0);
148 MODULE_PARM_DESC(max_vfs,
149                  "Maximum number of virtual functions to allocate per physical function - default is zero and maximum value is 63. (Deprecated)");
150 #endif /* CONFIG_PCI_IOV */
151
152 static unsigned int allow_unsupported_sfp;
153 module_param(allow_unsupported_sfp, uint, 0);
154 MODULE_PARM_DESC(allow_unsupported_sfp,
155                  "Allow unsupported and untested SFP+ modules on 82599-based adapters");
156
157 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
158 static int debug = -1;
159 module_param(debug, int, 0);
160 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
161
162 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
163 MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver");
164 MODULE_LICENSE("GPL v2");
165 MODULE_VERSION(DRV_VERSION);
166
167 static struct workqueue_struct *ixgbe_wq;
168
169 static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev);
170 static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *);
171
172 static const struct net_device_ops ixgbe_netdev_ops;
173
174 static bool netif_is_ixgbe(struct net_device *dev)
175 {
176         return dev && (dev->netdev_ops == &ixgbe_netdev_ops);
177 }
178
179 static int ixgbe_read_pci_cfg_word_parent(struct ixgbe_adapter *adapter,
180                                           u32 reg, u16 *value)
181 {
182         struct pci_dev *parent_dev;
183         struct pci_bus *parent_bus;
184
185         parent_bus = adapter->pdev->bus->parent;
186         if (!parent_bus)
187                 return -1;
188
189         parent_dev = parent_bus->self;
190         if (!parent_dev)
191                 return -1;
192
193         if (!pci_is_pcie(parent_dev))
194                 return -1;
195
196         pcie_capability_read_word(parent_dev, reg, value);
197         if (*value == IXGBE_FAILED_READ_CFG_WORD &&
198             ixgbe_check_cfg_remove(&adapter->hw, parent_dev))
199                 return -1;
200         return 0;
201 }
202
203 static s32 ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter)
204 {
205         struct ixgbe_hw *hw = &adapter->hw;
206         u16 link_status = 0;
207         int err;
208
209         hw->bus.type = ixgbe_bus_type_pci_express;
210
211         /* Get the negotiated link width and speed from PCI config space of the
212          * parent, as this device is behind a switch
213          */
214         err = ixgbe_read_pci_cfg_word_parent(adapter, 18, &link_status);
215
216         /* assume caller will handle error case */
217         if (err)
218                 return err;
219
220         hw->bus.width = ixgbe_convert_bus_width(link_status);
221         hw->bus.speed = ixgbe_convert_bus_speed(link_status);
222
223         return 0;
224 }
225
226 /**
227  * ixgbe_check_from_parent - Determine whether PCIe info should come from parent
228  * @hw: hw specific details
229  *
230  * This function is used by probe to determine whether a device's PCI-Express
231  * bandwidth details should be gathered from the parent bus instead of from the
232  * device. Used to ensure that various locations all have the correct device ID
233  * checks.
234  */
235 static inline bool ixgbe_pcie_from_parent(struct ixgbe_hw *hw)
236 {
237         switch (hw->device_id) {
238         case IXGBE_DEV_ID_82599_SFP_SF_QP:
239         case IXGBE_DEV_ID_82599_QSFP_SF_QP:
240                 return true;
241         default:
242                 return false;
243         }
244 }
245
246 static void ixgbe_check_minimum_link(struct ixgbe_adapter *adapter,
247                                      int expected_gts)
248 {
249         struct ixgbe_hw *hw = &adapter->hw;
250         struct pci_dev *pdev;
251
252         /* Some devices are not connected over PCIe and thus do not negotiate
253          * speed. These devices do not have valid bus info, and thus any report
254          * we generate may not be correct.
255          */
256         if (hw->bus.type == ixgbe_bus_type_internal)
257                 return;
258
259         /* determine whether to use the parent device */
260         if (ixgbe_pcie_from_parent(&adapter->hw))
261                 pdev = adapter->pdev->bus->parent->self;
262         else
263                 pdev = adapter->pdev;
264
265         pcie_print_link_status(pdev);
266 }
267
268 static void ixgbe_service_event_schedule(struct ixgbe_adapter *adapter)
269 {
270         if (!test_bit(__IXGBE_DOWN, &adapter->state) &&
271             !test_bit(__IXGBE_REMOVING, &adapter->state) &&
272             !test_and_set_bit(__IXGBE_SERVICE_SCHED, &adapter->state))
273                 queue_work(ixgbe_wq, &adapter->service_task);
274 }
275
276 static void ixgbe_remove_adapter(struct ixgbe_hw *hw)
277 {
278         struct ixgbe_adapter *adapter = hw->back;
279
280         if (!hw->hw_addr)
281                 return;
282         hw->hw_addr = NULL;
283         e_dev_err("Adapter removed\n");
284         if (test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
285                 ixgbe_service_event_schedule(adapter);
286 }
287
288 static u32 ixgbe_check_remove(struct ixgbe_hw *hw, u32 reg)
289 {
290         u8 __iomem *reg_addr;
291         u32 value;
292         int i;
293
294         reg_addr = READ_ONCE(hw->hw_addr);
295         if (ixgbe_removed(reg_addr))
296                 return IXGBE_FAILED_READ_REG;
297
298         /* Register read of 0xFFFFFFF can indicate the adapter has been removed,
299          * so perform several status register reads to determine if the adapter
300          * has been removed.
301          */
302         for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) {
303                 value = readl(reg_addr + IXGBE_STATUS);
304                 if (value != IXGBE_FAILED_READ_REG)
305                         break;
306                 mdelay(3);
307         }
308
309         if (value == IXGBE_FAILED_READ_REG)
310                 ixgbe_remove_adapter(hw);
311         else
312                 value = readl(reg_addr + reg);
313         return value;
314 }
315
316 /**
317  * ixgbe_read_reg - Read from device register
318  * @hw: hw specific details
319  * @reg: offset of register to read
320  *
321  * Returns : value read or IXGBE_FAILED_READ_REG if removed
322  *
323  * This function is used to read device registers. It checks for device
324  * removal by confirming any read that returns all ones by checking the
325  * status register value for all ones. This function avoids reading from
326  * the hardware if a removal was previously detected in which case it
327  * returns IXGBE_FAILED_READ_REG (all ones).
328  */
329 u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
330 {
331         u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
332         u32 value;
333
334         if (ixgbe_removed(reg_addr))
335                 return IXGBE_FAILED_READ_REG;
336         if (unlikely(hw->phy.nw_mng_if_sel &
337                      IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE)) {
338                 struct ixgbe_adapter *adapter;
339                 int i;
340
341                 for (i = 0; i < 200; ++i) {
342                         value = readl(reg_addr + IXGBE_MAC_SGMII_BUSY);
343                         if (likely(!value))
344                                 goto writes_completed;
345                         if (value == IXGBE_FAILED_READ_REG) {
346                                 ixgbe_remove_adapter(hw);
347                                 return IXGBE_FAILED_READ_REG;
348                         }
349                         udelay(5);
350                 }
351
352                 adapter = hw->back;
353                 e_warn(hw, "register writes incomplete %08x\n", value);
354         }
355
356 writes_completed:
357         value = readl(reg_addr + reg);
358         if (unlikely(value == IXGBE_FAILED_READ_REG))
359                 value = ixgbe_check_remove(hw, reg);
360         return value;
361 }
362
363 static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev)
364 {
365         u16 value;
366
367         pci_read_config_word(pdev, PCI_VENDOR_ID, &value);
368         if (value == IXGBE_FAILED_READ_CFG_WORD) {
369                 ixgbe_remove_adapter(hw);
370                 return true;
371         }
372         return false;
373 }
374
375 u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg)
376 {
377         struct ixgbe_adapter *adapter = hw->back;
378         u16 value;
379
380         if (ixgbe_removed(hw->hw_addr))
381                 return IXGBE_FAILED_READ_CFG_WORD;
382         pci_read_config_word(adapter->pdev, reg, &value);
383         if (value == IXGBE_FAILED_READ_CFG_WORD &&
384             ixgbe_check_cfg_remove(hw, adapter->pdev))
385                 return IXGBE_FAILED_READ_CFG_WORD;
386         return value;
387 }
388
389 #ifdef CONFIG_PCI_IOV
390 static u32 ixgbe_read_pci_cfg_dword(struct ixgbe_hw *hw, u32 reg)
391 {
392         struct ixgbe_adapter *adapter = hw->back;
393         u32 value;
394
395         if (ixgbe_removed(hw->hw_addr))
396                 return IXGBE_FAILED_READ_CFG_DWORD;
397         pci_read_config_dword(adapter->pdev, reg, &value);
398         if (value == IXGBE_FAILED_READ_CFG_DWORD &&
399             ixgbe_check_cfg_remove(hw, adapter->pdev))
400                 return IXGBE_FAILED_READ_CFG_DWORD;
401         return value;
402 }
403 #endif /* CONFIG_PCI_IOV */
404
405 void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value)
406 {
407         struct ixgbe_adapter *adapter = hw->back;
408
409         if (ixgbe_removed(hw->hw_addr))
410                 return;
411         pci_write_config_word(adapter->pdev, reg, value);
412 }
413
414 static void ixgbe_service_event_complete(struct ixgbe_adapter *adapter)
415 {
416         BUG_ON(!test_bit(__IXGBE_SERVICE_SCHED, &adapter->state));
417
418         /* flush memory to make sure state is correct before next watchdog */
419         smp_mb__before_atomic();
420         clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
421 }
422
423 struct ixgbe_reg_info {
424         u32 ofs;
425         char *name;
426 };
427
428 static const struct ixgbe_reg_info ixgbe_reg_info_tbl[] = {
429
430         /* General Registers */
431         {IXGBE_CTRL, "CTRL"},
432         {IXGBE_STATUS, "STATUS"},
433         {IXGBE_CTRL_EXT, "CTRL_EXT"},
434
435         /* Interrupt Registers */
436         {IXGBE_EICR, "EICR"},
437
438         /* RX Registers */
439         {IXGBE_SRRCTL(0), "SRRCTL"},
440         {IXGBE_DCA_RXCTRL(0), "DRXCTL"},
441         {IXGBE_RDLEN(0), "RDLEN"},
442         {IXGBE_RDH(0), "RDH"},
443         {IXGBE_RDT(0), "RDT"},
444         {IXGBE_RXDCTL(0), "RXDCTL"},
445         {IXGBE_RDBAL(0), "RDBAL"},
446         {IXGBE_RDBAH(0), "RDBAH"},
447
448         /* TX Registers */
449         {IXGBE_TDBAL(0), "TDBAL"},
450         {IXGBE_TDBAH(0), "TDBAH"},
451         {IXGBE_TDLEN(0), "TDLEN"},
452         {IXGBE_TDH(0), "TDH"},
453         {IXGBE_TDT(0), "TDT"},
454         {IXGBE_TXDCTL(0), "TXDCTL"},
455
456         /* List Terminator */
457         { .name = NULL }
458 };
459
460
461 /*
462  * ixgbe_regdump - register printout routine
463  */
464 static void ixgbe_regdump(struct ixgbe_hw *hw, struct ixgbe_reg_info *reginfo)
465 {
466         int i;
467         char rname[16];
468         u32 regs[64];
469
470         switch (reginfo->ofs) {
471         case IXGBE_SRRCTL(0):
472                 for (i = 0; i < 64; i++)
473                         regs[i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i));
474                 break;
475         case IXGBE_DCA_RXCTRL(0):
476                 for (i = 0; i < 64; i++)
477                         regs[i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i));
478                 break;
479         case IXGBE_RDLEN(0):
480                 for (i = 0; i < 64; i++)
481                         regs[i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i));
482                 break;
483         case IXGBE_RDH(0):
484                 for (i = 0; i < 64; i++)
485                         regs[i] = IXGBE_READ_REG(hw, IXGBE_RDH(i));
486                 break;
487         case IXGBE_RDT(0):
488                 for (i = 0; i < 64; i++)
489                         regs[i] = IXGBE_READ_REG(hw, IXGBE_RDT(i));
490                 break;
491         case IXGBE_RXDCTL(0):
492                 for (i = 0; i < 64; i++)
493                         regs[i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i));
494                 break;
495         case IXGBE_RDBAL(0):
496                 for (i = 0; i < 64; i++)
497                         regs[i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i));
498                 break;
499         case IXGBE_RDBAH(0):
500                 for (i = 0; i < 64; i++)
501                         regs[i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i));
502                 break;
503         case IXGBE_TDBAL(0):
504                 for (i = 0; i < 64; i++)
505                         regs[i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i));
506                 break;
507         case IXGBE_TDBAH(0):
508                 for (i = 0; i < 64; i++)
509                         regs[i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i));
510                 break;
511         case IXGBE_TDLEN(0):
512                 for (i = 0; i < 64; i++)
513                         regs[i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i));
514                 break;
515         case IXGBE_TDH(0):
516                 for (i = 0; i < 64; i++)
517                         regs[i] = IXGBE_READ_REG(hw, IXGBE_TDH(i));
518                 break;
519         case IXGBE_TDT(0):
520                 for (i = 0; i < 64; i++)
521                         regs[i] = IXGBE_READ_REG(hw, IXGBE_TDT(i));
522                 break;
523         case IXGBE_TXDCTL(0):
524                 for (i = 0; i < 64; i++)
525                         regs[i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i));
526                 break;
527         default:
528                 pr_info("%-15s %08x\n",
529                         reginfo->name, IXGBE_READ_REG(hw, reginfo->ofs));
530                 return;
531         }
532
533         i = 0;
534         while (i < 64) {
535                 int j;
536                 char buf[9 * 8 + 1];
537                 char *p = buf;
538
539                 snprintf(rname, 16, "%s[%d-%d]", reginfo->name, i, i + 7);
540                 for (j = 0; j < 8; j++)
541                         p += sprintf(p, " %08x", regs[i++]);
542                 pr_err("%-15s%s\n", rname, buf);
543         }
544
545 }
546
547 static void ixgbe_print_buffer(struct ixgbe_ring *ring, int n)
548 {
549         struct ixgbe_tx_buffer *tx_buffer;
550
551         tx_buffer = &ring->tx_buffer_info[ring->next_to_clean];
552         pr_info(" %5d %5X %5X %016llX %08X %p %016llX\n",
553                 n, ring->next_to_use, ring->next_to_clean,
554                 (u64)dma_unmap_addr(tx_buffer, dma),
555                 dma_unmap_len(tx_buffer, len),
556                 tx_buffer->next_to_watch,
557                 (u64)tx_buffer->time_stamp);
558 }
559
560 /*
561  * ixgbe_dump - Print registers, tx-rings and rx-rings
562  */
563 static void ixgbe_dump(struct ixgbe_adapter *adapter)
564 {
565         struct net_device *netdev = adapter->netdev;
566         struct ixgbe_hw *hw = &adapter->hw;
567         struct ixgbe_reg_info *reginfo;
568         int n = 0;
569         struct ixgbe_ring *ring;
570         struct ixgbe_tx_buffer *tx_buffer;
571         union ixgbe_adv_tx_desc *tx_desc;
572         struct my_u0 { u64 a; u64 b; } *u0;
573         struct ixgbe_ring *rx_ring;
574         union ixgbe_adv_rx_desc *rx_desc;
575         struct ixgbe_rx_buffer *rx_buffer_info;
576         int i = 0;
577
578         if (!netif_msg_hw(adapter))
579                 return;
580
581         /* Print netdevice Info */
582         if (netdev) {
583                 dev_info(&adapter->pdev->dev, "Net device Info\n");
584                 pr_info("Device Name     state            "
585                         "trans_start\n");
586                 pr_info("%-15s %016lX %016lX\n",
587                         netdev->name,
588                         netdev->state,
589                         dev_trans_start(netdev));
590         }
591
592         /* Print Registers */
593         dev_info(&adapter->pdev->dev, "Register Dump\n");
594         pr_info(" Register Name   Value\n");
595         for (reginfo = (struct ixgbe_reg_info *)ixgbe_reg_info_tbl;
596              reginfo->name; reginfo++) {
597                 ixgbe_regdump(hw, reginfo);
598         }
599
600         /* Print TX Ring Summary */
601         if (!netdev || !netif_running(netdev))
602                 return;
603
604         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
605         pr_info(" %s     %s              %s        %s\n",
606                 "Queue [NTU] [NTC] [bi(ntc)->dma  ]",
607                 "leng", "ntw", "timestamp");
608         for (n = 0; n < adapter->num_tx_queues; n++) {
609                 ring = adapter->tx_ring[n];
610                 ixgbe_print_buffer(ring, n);
611         }
612
613         for (n = 0; n < adapter->num_xdp_queues; n++) {
614                 ring = adapter->xdp_ring[n];
615                 ixgbe_print_buffer(ring, n);
616         }
617
618         /* Print TX Rings */
619         if (!netif_msg_tx_done(adapter))
620                 goto rx_ring_summary;
621
622         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
623
624         /* Transmit Descriptor Formats
625          *
626          * 82598 Advanced Transmit Descriptor
627          *   +--------------------------------------------------------------+
628          * 0 |         Buffer Address [63:0]                                |
629          *   +--------------------------------------------------------------+
630          * 8 |  PAYLEN  | POPTS  | IDX | STA | DCMD  |DTYP |  RSV |  DTALEN |
631          *   +--------------------------------------------------------------+
632          *   63       46 45    40 39 36 35 32 31   24 23 20 19              0
633          *
634          * 82598 Advanced Transmit Descriptor (Write-Back Format)
635          *   +--------------------------------------------------------------+
636          * 0 |                          RSV [63:0]                          |
637          *   +--------------------------------------------------------------+
638          * 8 |            RSV           |  STA  |          NXTSEQ           |
639          *   +--------------------------------------------------------------+
640          *   63                       36 35   32 31                         0
641          *
642          * 82599+ Advanced Transmit Descriptor
643          *   +--------------------------------------------------------------+
644          * 0 |         Buffer Address [63:0]                                |
645          *   +--------------------------------------------------------------+
646          * 8 |PAYLEN  |POPTS|CC|IDX  |STA  |DCMD  |DTYP |MAC  |RSV  |DTALEN |
647          *   +--------------------------------------------------------------+
648          *   63     46 45 40 39 38 36 35 32 31  24 23 20 19 18 17 16 15     0
649          *
650          * 82599+ Advanced Transmit Descriptor (Write-Back Format)
651          *   +--------------------------------------------------------------+
652          * 0 |                          RSV [63:0]                          |
653          *   +--------------------------------------------------------------+
654          * 8 |            RSV           |  STA  |           RSV             |
655          *   +--------------------------------------------------------------+
656          *   63                       36 35   32 31                         0
657          */
658
659         for (n = 0; n < adapter->num_tx_queues; n++) {
660                 ring = adapter->tx_ring[n];
661                 pr_info("------------------------------------\n");
662                 pr_info("TX QUEUE INDEX = %d\n", ring->queue_index);
663                 pr_info("------------------------------------\n");
664                 pr_info("%s%s    %s              %s        %s          %s\n",
665                         "T [desc]     [address 63:0  ] ",
666                         "[PlPOIdStDDt Ln] [bi->dma       ] ",
667                         "leng", "ntw", "timestamp", "bi->skb");
668
669                 for (i = 0; ring->desc && (i < ring->count); i++) {
670                         tx_desc = IXGBE_TX_DESC(ring, i);
671                         tx_buffer = &ring->tx_buffer_info[i];
672                         u0 = (struct my_u0 *)tx_desc;
673                         if (dma_unmap_len(tx_buffer, len) > 0) {
674                                 const char *ring_desc;
675
676                                 if (i == ring->next_to_use &&
677                                     i == ring->next_to_clean)
678                                         ring_desc = " NTC/U";
679                                 else if (i == ring->next_to_use)
680                                         ring_desc = " NTU";
681                                 else if (i == ring->next_to_clean)
682                                         ring_desc = " NTC";
683                                 else
684                                         ring_desc = "";
685                                 pr_info("T [0x%03X]    %016llX %016llX %016llX %08X %p %016llX %p%s",
686                                         i,
687                                         le64_to_cpu((__force __le64)u0->a),
688                                         le64_to_cpu((__force __le64)u0->b),
689                                         (u64)dma_unmap_addr(tx_buffer, dma),
690                                         dma_unmap_len(tx_buffer, len),
691                                         tx_buffer->next_to_watch,
692                                         (u64)tx_buffer->time_stamp,
693                                         tx_buffer->skb,
694                                         ring_desc);
695
696                                 if (netif_msg_pktdata(adapter) &&
697                                     tx_buffer->skb)
698                                         print_hex_dump(KERN_INFO, "",
699                                                 DUMP_PREFIX_ADDRESS, 16, 1,
700                                                 tx_buffer->skb->data,
701                                                 dma_unmap_len(tx_buffer, len),
702                                                 true);
703                         }
704                 }
705         }
706
707         /* Print RX Rings Summary */
708 rx_ring_summary:
709         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
710         pr_info("Queue [NTU] [NTC]\n");
711         for (n = 0; n < adapter->num_rx_queues; n++) {
712                 rx_ring = adapter->rx_ring[n];
713                 pr_info("%5d %5X %5X\n",
714                         n, rx_ring->next_to_use, rx_ring->next_to_clean);
715         }
716
717         /* Print RX Rings */
718         if (!netif_msg_rx_status(adapter))
719                 return;
720
721         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
722
723         /* Receive Descriptor Formats
724          *
725          * 82598 Advanced Receive Descriptor (Read) Format
726          *    63                                           1        0
727          *    +-----------------------------------------------------+
728          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
729          *    +----------------------------------------------+------+
730          *  8 |       Header Buffer Address [63:1]           |  DD  |
731          *    +-----------------------------------------------------+
732          *
733          *
734          * 82598 Advanced Receive Descriptor (Write-Back) Format
735          *
736          *   63       48 47    32 31  30      21 20 16 15   4 3     0
737          *   +------------------------------------------------------+
738          * 0 |       RSS Hash /  |SPH| HDR_LEN  | RSV |Packet|  RSS |
739          *   | Packet   | IP     |   |          |     | Type | Type |
740          *   | Checksum | Ident  |   |          |     |      |      |
741          *   +------------------------------------------------------+
742          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
743          *   +------------------------------------------------------+
744          *   63       48 47    32 31            20 19               0
745          *
746          * 82599+ Advanced Receive Descriptor (Read) Format
747          *    63                                           1        0
748          *    +-----------------------------------------------------+
749          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
750          *    +----------------------------------------------+------+
751          *  8 |       Header Buffer Address [63:1]           |  DD  |
752          *    +-----------------------------------------------------+
753          *
754          *
755          * 82599+ Advanced Receive Descriptor (Write-Back) Format
756          *
757          *   63       48 47    32 31  30      21 20 17 16   4 3     0
758          *   +------------------------------------------------------+
759          * 0 |RSS / Frag Checksum|SPH| HDR_LEN  |RSC- |Packet|  RSS |
760          *   |/ RTT / PCoE_PARAM |   |          | CNT | Type | Type |
761          *   |/ Flow Dir Flt ID  |   |          |     |      |      |
762          *   +------------------------------------------------------+
763          * 8 | VLAN Tag | Length |Extended Error| Xtnd Status/NEXTP |
764          *   +------------------------------------------------------+
765          *   63       48 47    32 31          20 19                 0
766          */
767
768         for (n = 0; n < adapter->num_rx_queues; n++) {
769                 rx_ring = adapter->rx_ring[n];
770                 pr_info("------------------------------------\n");
771                 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
772                 pr_info("------------------------------------\n");
773                 pr_info("%s%s%s\n",
774                         "R  [desc]      [ PktBuf     A0] ",
775                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb       ] ",
776                         "<-- Adv Rx Read format");
777                 pr_info("%s%s%s\n",
778                         "RWB[desc]      [PcsmIpSHl PtRs] ",
779                         "[vl er S cks ln] ---------------- [bi->skb       ] ",
780                         "<-- Adv Rx Write-Back format");
781
782                 for (i = 0; i < rx_ring->count; i++) {
783                         const char *ring_desc;
784
785                         if (i == rx_ring->next_to_use)
786                                 ring_desc = " NTU";
787                         else if (i == rx_ring->next_to_clean)
788                                 ring_desc = " NTC";
789                         else
790                                 ring_desc = "";
791
792                         rx_buffer_info = &rx_ring->rx_buffer_info[i];
793                         rx_desc = IXGBE_RX_DESC(rx_ring, i);
794                         u0 = (struct my_u0 *)rx_desc;
795                         if (rx_desc->wb.upper.length) {
796                                 /* Descriptor Done */
797                                 pr_info("RWB[0x%03X]     %016llX %016llX ---------------- %p%s\n",
798                                         i,
799                                         le64_to_cpu((__force __le64)u0->a),
800                                         le64_to_cpu((__force __le64)u0->b),
801                                         rx_buffer_info->skb,
802                                         ring_desc);
803                         } else {
804                                 pr_info("R  [0x%03X]     %016llX %016llX %016llX %p%s\n",
805                                         i,
806                                         le64_to_cpu((__force __le64)u0->a),
807                                         le64_to_cpu((__force __le64)u0->b),
808                                         (u64)rx_buffer_info->dma,
809                                         rx_buffer_info->skb,
810                                         ring_desc);
811
812                                 if (netif_msg_pktdata(adapter) &&
813                                     rx_buffer_info->dma) {
814                                         print_hex_dump(KERN_INFO, "",
815                                            DUMP_PREFIX_ADDRESS, 16, 1,
816                                            page_address(rx_buffer_info->page) +
817                                                     rx_buffer_info->page_offset,
818                                            ixgbe_rx_bufsz(rx_ring), true);
819                                 }
820                         }
821                 }
822         }
823 }
824
825 static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter)
826 {
827         u32 ctrl_ext;
828
829         /* Let firmware take over control of h/w */
830         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
831         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
832                         ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD);
833 }
834
835 static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter)
836 {
837         u32 ctrl_ext;
838
839         /* Let firmware know the driver has taken over */
840         ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
841         IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
842                         ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD);
843 }
844
845 /**
846  * ixgbe_set_ivar - set the IVAR registers, mapping interrupt causes to vectors
847  * @adapter: pointer to adapter struct
848  * @direction: 0 for Rx, 1 for Tx, -1 for other causes
849  * @queue: queue to map the corresponding interrupt to
850  * @msix_vector: the vector to map to the corresponding queue
851  *
852  */
853 static void ixgbe_set_ivar(struct ixgbe_adapter *adapter, s8 direction,
854                            u8 queue, u8 msix_vector)
855 {
856         u32 ivar, index;
857         struct ixgbe_hw *hw = &adapter->hw;
858         switch (hw->mac.type) {
859         case ixgbe_mac_82598EB:
860                 msix_vector |= IXGBE_IVAR_ALLOC_VAL;
861                 if (direction == -1)
862                         direction = 0;
863                 index = (((direction * 64) + queue) >> 2) & 0x1F;
864                 ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
865                 ivar &= ~(0xFF << (8 * (queue & 0x3)));
866                 ivar |= (msix_vector << (8 * (queue & 0x3)));
867                 IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar);
868                 break;
869         case ixgbe_mac_82599EB:
870         case ixgbe_mac_X540:
871         case ixgbe_mac_X550:
872         case ixgbe_mac_X550EM_x:
873         case ixgbe_mac_x550em_a:
874                 if (direction == -1) {
875                         /* other causes */
876                         msix_vector |= IXGBE_IVAR_ALLOC_VAL;
877                         index = ((queue & 1) * 8);
878                         ivar = IXGBE_READ_REG(&adapter->hw, IXGBE_IVAR_MISC);
879                         ivar &= ~(0xFF << index);
880                         ivar |= (msix_vector << index);
881                         IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR_MISC, ivar);
882                         break;
883                 } else {
884                         /* tx or rx causes */
885                         msix_vector |= IXGBE_IVAR_ALLOC_VAL;
886                         index = ((16 * (queue & 1)) + (8 * direction));
887                         ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(queue >> 1));
888                         ivar &= ~(0xFF << index);
889                         ivar |= (msix_vector << index);
890                         IXGBE_WRITE_REG(hw, IXGBE_IVAR(queue >> 1), ivar);
891                         break;
892                 }
893         default:
894                 break;
895         }
896 }
897
898 void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter,
899                             u64 qmask)
900 {
901         u32 mask;
902
903         switch (adapter->hw.mac.type) {
904         case ixgbe_mac_82598EB:
905                 mask = (IXGBE_EIMS_RTX_QUEUE & qmask);
906                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask);
907                 break;
908         case ixgbe_mac_82599EB:
909         case ixgbe_mac_X540:
910         case ixgbe_mac_X550:
911         case ixgbe_mac_X550EM_x:
912         case ixgbe_mac_x550em_a:
913                 mask = (qmask & 0xFFFFFFFF);
914                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask);
915                 mask = (qmask >> 32);
916                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask);
917                 break;
918         default:
919                 break;
920         }
921 }
922
923 static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter)
924 {
925         struct ixgbe_hw *hw = &adapter->hw;
926         struct ixgbe_hw_stats *hwstats = &adapter->stats;
927         int i;
928         u32 data;
929
930         if ((hw->fc.current_mode != ixgbe_fc_full) &&
931             (hw->fc.current_mode != ixgbe_fc_rx_pause))
932                 return;
933
934         switch (hw->mac.type) {
935         case ixgbe_mac_82598EB:
936                 data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC);
937                 break;
938         default:
939                 data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT);
940         }
941         hwstats->lxoffrxc += data;
942
943         /* refill credits (no tx hang) if we received xoff */
944         if (!data)
945                 return;
946
947         for (i = 0; i < adapter->num_tx_queues; i++)
948                 clear_bit(__IXGBE_HANG_CHECK_ARMED,
949                           &adapter->tx_ring[i]->state);
950
951         for (i = 0; i < adapter->num_xdp_queues; i++)
952                 clear_bit(__IXGBE_HANG_CHECK_ARMED,
953                           &adapter->xdp_ring[i]->state);
954 }
955
956 static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
957 {
958         struct ixgbe_hw *hw = &adapter->hw;
959         struct ixgbe_hw_stats *hwstats = &adapter->stats;
960         u32 xoff[8] = {0};
961         u8 tc;
962         int i;
963         bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
964
965         if (adapter->ixgbe_ieee_pfc)
966                 pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
967
968         if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) {
969                 ixgbe_update_xoff_rx_lfc(adapter);
970                 return;
971         }
972
973         /* update stats for each tc, only valid with PFC enabled */
974         for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) {
975                 u32 pxoffrxc;
976
977                 switch (hw->mac.type) {
978                 case ixgbe_mac_82598EB:
979                         pxoffrxc = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i));
980                         break;
981                 default:
982                         pxoffrxc = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i));
983                 }
984                 hwstats->pxoffrxc[i] += pxoffrxc;
985                 /* Get the TC for given UP */
986                 tc = netdev_get_prio_tc_map(adapter->netdev, i);
987                 xoff[tc] += pxoffrxc;
988         }
989
990         /* disarm tx queues that have received xoff frames */
991         for (i = 0; i < adapter->num_tx_queues; i++) {
992                 struct ixgbe_ring *tx_ring = adapter->tx_ring[i];
993
994                 tc = tx_ring->dcb_tc;
995                 if (xoff[tc])
996                         clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
997         }
998
999         for (i = 0; i < adapter->num_xdp_queues; i++) {
1000                 struct ixgbe_ring *xdp_ring = adapter->xdp_ring[i];
1001
1002                 tc = xdp_ring->dcb_tc;
1003                 if (xoff[tc])
1004                         clear_bit(__IXGBE_HANG_CHECK_ARMED, &xdp_ring->state);
1005         }
1006 }
1007
1008 static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring)
1009 {
1010         return ring->stats.packets;
1011 }
1012
1013 static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring)
1014 {
1015         unsigned int head, tail;
1016
1017         head = ring->next_to_clean;
1018         tail = ring->next_to_use;
1019
1020         return ((head <= tail) ? tail : tail + ring->count) - head;
1021 }
1022
1023 static inline bool ixgbe_check_tx_hang(struct ixgbe_ring *tx_ring)
1024 {
1025         u32 tx_done = ixgbe_get_tx_completed(tx_ring);
1026         u32 tx_done_old = tx_ring->tx_stats.tx_done_old;
1027         u32 tx_pending = ixgbe_get_tx_pending(tx_ring);
1028
1029         clear_check_for_tx_hang(tx_ring);
1030
1031         /*
1032          * Check for a hung queue, but be thorough. This verifies
1033          * that a transmit has been completed since the previous
1034          * check AND there is at least one packet pending. The
1035          * ARMED bit is set to indicate a potential hang. The
1036          * bit is cleared if a pause frame is received to remove
1037          * false hang detection due to PFC or 802.3x frames. By
1038          * requiring this to fail twice we avoid races with
1039          * pfc clearing the ARMED bit and conditions where we
1040          * run the check_tx_hang logic with a transmit completion
1041          * pending but without time to complete it yet.
1042          */
1043         if (tx_done_old == tx_done && tx_pending)
1044                 /* make sure it is true for two checks in a row */
1045                 return test_and_set_bit(__IXGBE_HANG_CHECK_ARMED,
1046                                         &tx_ring->state);
1047         /* update completed stats and continue */
1048         tx_ring->tx_stats.tx_done_old = tx_done;
1049         /* reset the countdown */
1050         clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
1051
1052         return false;
1053 }
1054
1055 /**
1056  * ixgbe_tx_timeout_reset - initiate reset due to Tx timeout
1057  * @adapter: driver private struct
1058  **/
1059 static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter)
1060 {
1061
1062         /* Do the reset outside of interrupt context */
1063         if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
1064                 set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
1065                 e_warn(drv, "initiating reset due to tx timeout\n");
1066                 ixgbe_service_event_schedule(adapter);
1067         }
1068 }
1069
1070 /**
1071  * ixgbe_tx_maxrate - callback to set the maximum per-queue bitrate
1072  * @netdev: network interface device structure
1073  * @queue_index: Tx queue to set
1074  * @maxrate: desired maximum transmit bitrate
1075  **/
1076 static int ixgbe_tx_maxrate(struct net_device *netdev,
1077                             int queue_index, u32 maxrate)
1078 {
1079         struct ixgbe_adapter *adapter = netdev_priv(netdev);
1080         struct ixgbe_hw *hw = &adapter->hw;
1081         u32 bcnrc_val = ixgbe_link_mbps(adapter);
1082
1083         if (!maxrate)
1084                 return 0;
1085
1086         /* Calculate the rate factor values to set */
1087         bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT;
1088         bcnrc_val /= maxrate;
1089
1090         /* clear everything but the rate factor */
1091         bcnrc_val &= IXGBE_RTTBCNRC_RF_INT_MASK |
1092         IXGBE_RTTBCNRC_RF_DEC_MASK;
1093
1094         /* enable the rate scheduler */
1095         bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA;
1096
1097         IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, queue_index);
1098         IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, bcnrc_val);
1099
1100         return 0;
1101 }
1102
1103 /**
1104  * ixgbe_clean_tx_irq - Reclaim resources after transmit completes
1105  * @q_vector: structure containing interrupt and ring information
1106  * @tx_ring: tx ring to clean
1107  * @napi_budget: Used to determine if we are in netpoll
1108  **/
1109 static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
1110                                struct ixgbe_ring *tx_ring, int napi_budget)
1111 {
1112         struct ixgbe_adapter *adapter = q_vector->adapter;
1113         struct ixgbe_tx_buffer *tx_buffer;
1114         union ixgbe_adv_tx_desc *tx_desc;
1115         unsigned int total_bytes = 0, total_packets = 0, total_ipsec = 0;
1116         unsigned int budget = q_vector->tx.work_limit;
1117         unsigned int i = tx_ring->next_to_clean;
1118
1119         if (test_bit(__IXGBE_DOWN, &adapter->state))
1120                 return true;
1121
1122         tx_buffer = &tx_ring->tx_buffer_info[i];
1123         tx_desc = IXGBE_TX_DESC(tx_ring, i);
1124         i -= tx_ring->count;
1125
1126         do {
1127                 union ixgbe_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
1128
1129                 /* if next_to_watch is not set then there is no work pending */
1130                 if (!eop_desc)
1131                         break;
1132
1133                 /* prevent any other reads prior to eop_desc */
1134                 smp_rmb();
1135
1136                 /* if DD is not set pending work has not been completed */
1137                 if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
1138                         break;
1139
1140                 /* clear next_to_watch to prevent false hangs */
1141                 tx_buffer->next_to_watch = NULL;
1142
1143                 /* update the statistics for this packet */
1144                 total_bytes += tx_buffer->bytecount;
1145                 total_packets += tx_buffer->gso_segs;
1146                 if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
1147                         total_ipsec++;
1148
1149                 /* free the skb */
1150                 if (ring_is_xdp(tx_ring))
1151                         xdp_return_frame(tx_buffer->xdpf);
1152                 else
1153                         napi_consume_skb(tx_buffer->skb, napi_budget);
1154
1155                 /* unmap skb header data */
1156                 dma_unmap_single(tx_ring->dev,
1157                                  dma_unmap_addr(tx_buffer, dma),
1158                                  dma_unmap_len(tx_buffer, len),
1159                                  DMA_TO_DEVICE);
1160
1161                 /* clear tx_buffer data */
1162                 dma_unmap_len_set(tx_buffer, len, 0);
1163
1164                 /* unmap remaining buffers */
1165                 while (tx_desc != eop_desc) {
1166                         tx_buffer++;
1167                         tx_desc++;
1168                         i++;
1169                         if (unlikely(!i)) {
1170                                 i -= tx_ring->count;
1171                                 tx_buffer = tx_ring->tx_buffer_info;
1172                                 tx_desc = IXGBE_TX_DESC(tx_ring, 0);
1173                         }
1174
1175                         /* unmap any remaining paged data */
1176                         if (dma_unmap_len(tx_buffer, len)) {
1177                                 dma_unmap_page(tx_ring->dev,
1178                                                dma_unmap_addr(tx_buffer, dma),
1179                                                dma_unmap_len(tx_buffer, len),
1180                                                DMA_TO_DEVICE);
1181                                 dma_unmap_len_set(tx_buffer, len, 0);
1182                         }
1183                 }
1184
1185                 /* move us one more past the eop_desc for start of next pkt */
1186                 tx_buffer++;
1187                 tx_desc++;
1188                 i++;
1189                 if (unlikely(!i)) {
1190                         i -= tx_ring->count;
1191                         tx_buffer = tx_ring->tx_buffer_info;
1192                         tx_desc = IXGBE_TX_DESC(tx_ring, 0);
1193                 }
1194
1195                 /* issue prefetch for next Tx descriptor */
1196                 prefetch(tx_desc);
1197
1198                 /* update budget accounting */
1199                 budget--;
1200         } while (likely(budget));
1201
1202         i += tx_ring->count;
1203         tx_ring->next_to_clean = i;
1204         u64_stats_update_begin(&tx_ring->syncp);
1205         tx_ring->stats.bytes += total_bytes;
1206         tx_ring->stats.packets += total_packets;
1207         u64_stats_update_end(&tx_ring->syncp);
1208         q_vector->tx.total_bytes += total_bytes;
1209         q_vector->tx.total_packets += total_packets;
1210         adapter->tx_ipsec += total_ipsec;
1211
1212         if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
1213                 /* schedule immediate reset if we believe we hung */
1214                 struct ixgbe_hw *hw = &adapter->hw;
1215                 e_err(drv, "Detected Tx Unit Hang %s\n"
1216                         "  Tx Queue             <%d>\n"
1217                         "  TDH, TDT             <%x>, <%x>\n"
1218                         "  next_to_use          <%x>\n"
1219                         "  next_to_clean        <%x>\n"
1220                         "tx_buffer_info[next_to_clean]\n"
1221                         "  time_stamp           <%lx>\n"
1222                         "  jiffies              <%lx>\n",
1223                         ring_is_xdp(tx_ring) ? "(XDP)" : "",
1224                         tx_ring->queue_index,
1225                         IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)),
1226                         IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)),
1227                         tx_ring->next_to_use, i,
1228                         tx_ring->tx_buffer_info[i].time_stamp, jiffies);
1229
1230                 if (!ring_is_xdp(tx_ring))
1231                         netif_stop_subqueue(tx_ring->netdev,
1232                                             tx_ring->queue_index);
1233
1234                 e_info(probe,
1235                        "tx hang %d detected on queue %d, resetting adapter\n",
1236                         adapter->tx_timeout_count + 1, tx_ring->queue_index);
1237
1238                 /* schedule immediate reset if we believe we hung */
1239                 ixgbe_tx_timeout_reset(adapter);
1240
1241                 /* the adapter is about to reset, no point in enabling stuff */
1242                 return true;
1243         }
1244
1245         if (ring_is_xdp(tx_ring))
1246                 return !!budget;
1247
1248         netdev_tx_completed_queue(txring_txq(tx_ring),
1249                                   total_packets, total_bytes);
1250
1251 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
1252         if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
1253                      (ixgbe_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
1254                 /* Make sure that anybody stopping the queue after this
1255                  * sees the new next_to_clean.
1256                  */
1257                 smp_mb();
1258                 if (__netif_subqueue_stopped(tx_ring->netdev,
1259                                              tx_ring->queue_index)
1260                     && !test_bit(__IXGBE_DOWN, &adapter->state)) {
1261                         netif_wake_subqueue(tx_ring->netdev,
1262                                             tx_ring->queue_index);
1263                         ++tx_ring->tx_stats.restart_queue;
1264                 }
1265         }
1266
1267         return !!budget;
1268 }
1269
1270 #ifdef CONFIG_IXGBE_DCA
1271 static void ixgbe_update_tx_dca(struct ixgbe_adapter *adapter,
1272                                 struct ixgbe_ring *tx_ring,
1273                                 int cpu)
1274 {
1275         struct ixgbe_hw *hw = &adapter->hw;
1276         u32 txctrl = 0;
1277         u16 reg_offset;
1278
1279         if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
1280                 txctrl = dca3_get_tag(tx_ring->dev, cpu);
1281
1282         switch (hw->mac.type) {
1283         case ixgbe_mac_82598EB:
1284                 reg_offset = IXGBE_DCA_TXCTRL(tx_ring->reg_idx);
1285                 break;
1286         case ixgbe_mac_82599EB:
1287         case ixgbe_mac_X540:
1288                 reg_offset = IXGBE_DCA_TXCTRL_82599(tx_ring->reg_idx);
1289                 txctrl <<= IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599;
1290                 break;
1291         default:
1292                 /* for unknown hardware do not write register */
1293                 return;
1294         }
1295
1296         /*
1297          * We can enable relaxed ordering for reads, but not writes when
1298          * DCA is enabled.  This is due to a known issue in some chipsets
1299          * which will cause the DCA tag to be cleared.
1300          */
1301         txctrl |= IXGBE_DCA_TXCTRL_DESC_RRO_EN |
1302                   IXGBE_DCA_TXCTRL_DATA_RRO_EN |
1303                   IXGBE_DCA_TXCTRL_DESC_DCA_EN;
1304
1305         IXGBE_WRITE_REG(hw, reg_offset, txctrl);
1306 }
1307
1308 static void ixgbe_update_rx_dca(struct ixgbe_adapter *adapter,
1309                                 struct ixgbe_ring *rx_ring,
1310                                 int cpu)
1311 {
1312         struct ixgbe_hw *hw = &adapter->hw;
1313         u32 rxctrl = 0;
1314         u8 reg_idx = rx_ring->reg_idx;
1315
1316         if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
1317                 rxctrl = dca3_get_tag(rx_ring->dev, cpu);
1318
1319         switch (hw->mac.type) {
1320         case ixgbe_mac_82599EB:
1321         case ixgbe_mac_X540:
1322                 rxctrl <<= IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599;
1323                 break;
1324         default:
1325                 break;
1326         }
1327
1328         /*
1329          * We can enable relaxed ordering for reads, but not writes when
1330          * DCA is enabled.  This is due to a known issue in some chipsets
1331          * which will cause the DCA tag to be cleared.
1332          */
1333         rxctrl |= IXGBE_DCA_RXCTRL_DESC_RRO_EN |
1334                   IXGBE_DCA_RXCTRL_DATA_DCA_EN |
1335                   IXGBE_DCA_RXCTRL_DESC_DCA_EN;
1336
1337         IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(reg_idx), rxctrl);
1338 }
1339
1340 static void ixgbe_update_dca(struct ixgbe_q_vector *q_vector)
1341 {
1342         struct ixgbe_adapter *adapter = q_vector->adapter;
1343         struct ixgbe_ring *ring;
1344         int cpu = get_cpu();
1345
1346         if (q_vector->cpu == cpu)
1347                 goto out_no_update;
1348
1349         ixgbe_for_each_ring(ring, q_vector->tx)
1350                 ixgbe_update_tx_dca(adapter, ring, cpu);
1351
1352         ixgbe_for_each_ring(ring, q_vector->rx)
1353                 ixgbe_update_rx_dca(adapter, ring, cpu);
1354
1355         q_vector->cpu = cpu;
1356 out_no_update:
1357         put_cpu();
1358 }
1359
1360 static void ixgbe_setup_dca(struct ixgbe_adapter *adapter)
1361 {
1362         int i;
1363
1364         /* always use CB2 mode, difference is masked in the CB driver */
1365         if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
1366                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
1367                                 IXGBE_DCA_CTRL_DCA_MODE_CB2);
1368         else
1369                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
1370                                 IXGBE_DCA_CTRL_DCA_DISABLE);
1371
1372         for (i = 0; i < adapter->num_q_vectors; i++) {
1373                 adapter->q_vector[i]->cpu = -1;
1374                 ixgbe_update_dca(adapter->q_vector[i]);
1375         }
1376 }
1377
1378 static int __ixgbe_notify_dca(struct device *dev, void *data)
1379 {
1380         struct ixgbe_adapter *adapter = dev_get_drvdata(dev);
1381         unsigned long event = *(unsigned long *)data;
1382
1383         if (!(adapter->flags & IXGBE_FLAG_DCA_CAPABLE))
1384                 return 0;
1385
1386         switch (event) {
1387         case DCA_PROVIDER_ADD:
1388                 /* if we're already enabled, don't do it again */
1389                 if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
1390                         break;
1391                 if (dca_add_requester(dev) == 0) {
1392                         adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
1393                         IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
1394                                         IXGBE_DCA_CTRL_DCA_MODE_CB2);
1395                         break;
1396                 }
1397                 /* fall through - DCA is disabled. */
1398         case DCA_PROVIDER_REMOVE:
1399                 if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) {
1400                         dca_remove_requester(dev);
1401                         adapter->flags &= ~IXGBE_FLAG_DCA_ENABLED;
1402                         IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
1403                                         IXGBE_DCA_CTRL_DCA_DISABLE);
1404                 }
1405                 break;
1406         }
1407
1408         return 0;
1409 }
1410
1411 #endif /* CONFIG_IXGBE_DCA */
1412
1413 #define IXGBE_RSS_L4_TYPES_MASK \
1414         ((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \
1415          (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \
1416          (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \
1417          (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP))
1418
1419 static inline void ixgbe_rx_hash(struct ixgbe_ring *ring,
1420                                  union ixgbe_adv_rx_desc *rx_desc,
1421                                  struct sk_buff *skb)
1422 {
1423         u16 rss_type;
1424
1425         if (!(ring->netdev->features & NETIF_F_RXHASH))
1426                 return;
1427
1428         rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) &
1429                    IXGBE_RXDADV_RSSTYPE_MASK;
1430
1431         if (!rss_type)
1432                 return;
1433
1434         skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
1435                      (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ?
1436                      PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
1437 }
1438
1439 #ifdef IXGBE_FCOE
1440 /**
1441  * ixgbe_rx_is_fcoe - check the rx desc for incoming pkt type
1442  * @ring: structure containing ring specific data
1443  * @rx_desc: advanced rx descriptor
1444  *
1445  * Returns : true if it is FCoE pkt
1446  */
1447 static inline bool ixgbe_rx_is_fcoe(struct ixgbe_ring *ring,
1448                                     union ixgbe_adv_rx_desc *rx_desc)
1449 {
1450         __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
1451
1452         return test_bit(__IXGBE_RX_FCOE, &ring->state) &&
1453                ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_ETQF_MASK)) ==
1454                 (cpu_to_le16(IXGBE_ETQF_FILTER_FCOE <<
1455                              IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT)));
1456 }
1457
1458 #endif /* IXGBE_FCOE */
1459 /**
1460  * ixgbe_rx_checksum - indicate in skb if hw indicated a good cksum
1461  * @ring: structure containing ring specific data
1462  * @rx_desc: current Rx descriptor being processed
1463  * @skb: skb currently being received and modified
1464  **/
1465 static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring,
1466                                      union ixgbe_adv_rx_desc *rx_desc,
1467                                      struct sk_buff *skb)
1468 {
1469         __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
1470         bool encap_pkt = false;
1471
1472         skb_checksum_none_assert(skb);
1473
1474         /* Rx csum disabled */
1475         if (!(ring->netdev->features & NETIF_F_RXCSUM))
1476                 return;
1477
1478         /* check for VXLAN and Geneve packets */
1479         if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) {
1480                 encap_pkt = true;
1481                 skb->encapsulation = 1;
1482         }
1483
1484         /* if IP and error */
1485         if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) &&
1486             ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) {
1487                 ring->rx_stats.csum_err++;
1488                 return;
1489         }
1490
1491         if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS))
1492                 return;
1493
1494         if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) {
1495                 /*
1496                  * 82599 errata, UDP frames with a 0 checksum can be marked as
1497                  * checksum errors.
1498                  */
1499                 if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_UDP)) &&
1500                     test_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state))
1501                         return;
1502
1503                 ring->rx_stats.csum_err++;
1504                 return;
1505         }
1506
1507         /* It must be a TCP or UDP packet with a valid checksum */
1508         skb->ip_summed = CHECKSUM_UNNECESSARY;
1509         if (encap_pkt) {
1510                 if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_OUTERIPCS))
1511                         return;
1512
1513                 if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_OUTERIPER)) {
1514                         skb->ip_summed = CHECKSUM_NONE;
1515                         return;
1516                 }
1517                 /* If we checked the outer header let the stack know */
1518                 skb->csum_level = 1;
1519         }
1520 }
1521
1522 static inline unsigned int ixgbe_rx_offset(struct ixgbe_ring *rx_ring)
1523 {
1524         return ring_uses_build_skb(rx_ring) ? IXGBE_SKB_PAD : 0;
1525 }
1526
1527 static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
1528                                     struct ixgbe_rx_buffer *bi)
1529 {
1530         struct page *page = bi->page;
1531         dma_addr_t dma;
1532
1533         /* since we are recycling buffers we should seldom need to alloc */
1534         if (likely(page))
1535                 return true;
1536
1537         /* alloc new page for storage */
1538         page = dev_alloc_pages(ixgbe_rx_pg_order(rx_ring));
1539         if (unlikely(!page)) {
1540                 rx_ring->rx_stats.alloc_rx_page_failed++;
1541                 return false;
1542         }
1543
1544         /* map page for use */
1545         dma = dma_map_page_attrs(rx_ring->dev, page, 0,
1546                                  ixgbe_rx_pg_size(rx_ring),
1547                                  DMA_FROM_DEVICE,
1548                                  IXGBE_RX_DMA_ATTR);
1549
1550         /*
1551          * if mapping failed free memory back to system since
1552          * there isn't much point in holding memory we can't use
1553          */
1554         if (dma_mapping_error(rx_ring->dev, dma)) {
1555                 __free_pages(page, ixgbe_rx_pg_order(rx_ring));
1556
1557                 rx_ring->rx_stats.alloc_rx_page_failed++;
1558                 return false;
1559         }
1560
1561         bi->dma = dma;
1562         bi->page = page;
1563         bi->page_offset = ixgbe_rx_offset(rx_ring);
1564         page_ref_add(page, USHRT_MAX - 1);
1565         bi->pagecnt_bias = USHRT_MAX;
1566         rx_ring->rx_stats.alloc_rx_page++;
1567
1568         return true;
1569 }
1570
1571 /**
1572  * ixgbe_alloc_rx_buffers - Replace used receive buffers
1573  * @rx_ring: ring to place buffers on
1574  * @cleaned_count: number of buffers to replace
1575  **/
1576 void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count)
1577 {
1578         union ixgbe_adv_rx_desc *rx_desc;
1579         struct ixgbe_rx_buffer *bi;
1580         u16 i = rx_ring->next_to_use;
1581         u16 bufsz;
1582
1583         /* nothing to do */
1584         if (!cleaned_count)
1585                 return;
1586
1587         rx_desc = IXGBE_RX_DESC(rx_ring, i);
1588         bi = &rx_ring->rx_buffer_info[i];
1589         i -= rx_ring->count;
1590
1591         bufsz = ixgbe_rx_bufsz(rx_ring);
1592
1593         do {
1594                 if (!ixgbe_alloc_mapped_page(rx_ring, bi))
1595                         break;
1596
1597                 /* sync the buffer for use by the device */
1598                 dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
1599                                                  bi->page_offset, bufsz,
1600                                                  DMA_FROM_DEVICE);
1601
1602                 /*
1603                  * Refresh the desc even if buffer_addrs didn't change
1604                  * because each write-back erases this info.
1605                  */
1606                 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
1607
1608                 rx_desc++;
1609                 bi++;
1610                 i++;
1611                 if (unlikely(!i)) {
1612                         rx_desc = IXGBE_RX_DESC(rx_ring, 0);
1613                         bi = rx_ring->rx_buffer_info;
1614                         i -= rx_ring->count;
1615                 }
1616
1617                 /* clear the length for the next_to_use descriptor */
1618                 rx_desc->wb.upper.length = 0;
1619
1620                 cleaned_count--;
1621         } while (cleaned_count);
1622
1623         i += rx_ring->count;
1624
1625         if (rx_ring->next_to_use != i) {
1626                 rx_ring->next_to_use = i;
1627
1628                 /* update next to alloc since we have filled the ring */
1629                 rx_ring->next_to_alloc = i;
1630
1631                 /* Force memory writes to complete before letting h/w
1632                  * know there are new descriptors to fetch.  (Only
1633                  * applicable for weak-ordered memory model archs,
1634                  * such as IA-64).
1635                  */
1636                 wmb();
1637                 writel(i, rx_ring->tail);
1638         }
1639 }
1640
1641 static void ixgbe_set_rsc_gso_size(struct ixgbe_ring *ring,
1642                                    struct sk_buff *skb)
1643 {
1644         u16 hdr_len = skb_headlen(skb);
1645
1646         /* set gso_size to avoid messing up TCP MSS */
1647         skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len),
1648                                                  IXGBE_CB(skb)->append_cnt);
1649         skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
1650 }
1651
1652 static void ixgbe_update_rsc_stats(struct ixgbe_ring *rx_ring,
1653                                    struct sk_buff *skb)
1654 {
1655         /* if append_cnt is 0 then frame is not RSC */
1656         if (!IXGBE_CB(skb)->append_cnt)
1657                 return;
1658
1659         rx_ring->rx_stats.rsc_count += IXGBE_CB(skb)->append_cnt;
1660         rx_ring->rx_stats.rsc_flush++;
1661
1662         ixgbe_set_rsc_gso_size(rx_ring, skb);
1663
1664         /* gso_size is computed using append_cnt so always clear it last */
1665         IXGBE_CB(skb)->append_cnt = 0;
1666 }
1667
1668 /**
1669  * ixgbe_process_skb_fields - Populate skb header fields from Rx descriptor
1670  * @rx_ring: rx descriptor ring packet is being transacted on
1671  * @rx_desc: pointer to the EOP Rx descriptor
1672  * @skb: pointer to current skb being populated
1673  *
1674  * This function checks the ring, descriptor, and packet information in
1675  * order to populate the hash, checksum, VLAN, timestamp, protocol, and
1676  * other fields within the skb.
1677  **/
1678 void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring,
1679                               union ixgbe_adv_rx_desc *rx_desc,
1680                               struct sk_buff *skb)
1681 {
1682         struct net_device *dev = rx_ring->netdev;
1683         u32 flags = rx_ring->q_vector->adapter->flags;
1684
1685         ixgbe_update_rsc_stats(rx_ring, skb);
1686
1687         ixgbe_rx_hash(rx_ring, rx_desc, skb);
1688
1689         ixgbe_rx_checksum(rx_ring, rx_desc, skb);
1690
1691         if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED))
1692                 ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
1693
1694         if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1695             ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
1696                 u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
1697                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
1698         }
1699
1700         if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
1701                 ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
1702
1703         /* record Rx queue, or update MACVLAN statistics */
1704         if (netif_is_ixgbe(dev))
1705                 skb_record_rx_queue(skb, rx_ring->queue_index);
1706         else
1707                 macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true,
1708                                  false);
1709
1710         skb->protocol = eth_type_trans(skb, dev);
1711 }
1712
1713 void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
1714                   struct sk_buff *skb)
1715 {
1716         napi_gro_receive(&q_vector->napi, skb);
1717 }
1718
1719 /**
1720  * ixgbe_is_non_eop - process handling of non-EOP buffers
1721  * @rx_ring: Rx ring being processed
1722  * @rx_desc: Rx descriptor for current buffer
1723  * @skb: Current socket buffer containing buffer in progress
1724  *
1725  * This function updates next to clean.  If the buffer is an EOP buffer
1726  * this function exits returning false, otherwise it will place the
1727  * sk_buff in the next buffer to be chained and return true indicating
1728  * that this is in fact a non-EOP buffer.
1729  **/
1730 static bool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring,
1731                              union ixgbe_adv_rx_desc *rx_desc,
1732                              struct sk_buff *skb)
1733 {
1734         u32 ntc = rx_ring->next_to_clean + 1;
1735
1736         /* fetch, update, and store next to clean */
1737         ntc = (ntc < rx_ring->count) ? ntc : 0;
1738         rx_ring->next_to_clean = ntc;
1739
1740         prefetch(IXGBE_RX_DESC(rx_ring, ntc));
1741
1742         /* update RSC append count if present */
1743         if (ring_is_rsc_enabled(rx_ring)) {
1744                 __le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data &
1745                                      cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK);
1746
1747                 if (unlikely(rsc_enabled)) {
1748                         u32 rsc_cnt = le32_to_cpu(rsc_enabled);
1749
1750                         rsc_cnt >>= IXGBE_RXDADV_RSCCNT_SHIFT;
1751                         IXGBE_CB(skb)->append_cnt += rsc_cnt - 1;
1752
1753                         /* update ntc based on RSC value */
1754                         ntc = le32_to_cpu(rx_desc->wb.upper.status_error);
1755                         ntc &= IXGBE_RXDADV_NEXTP_MASK;
1756                         ntc >>= IXGBE_RXDADV_NEXTP_SHIFT;
1757                 }
1758         }
1759
1760         /* if we are the last buffer then there is nothing else to do */
1761         if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)))
1762                 return false;
1763
1764         /* place skb in next buffer to be received */
1765         rx_ring->rx_buffer_info[ntc].skb = skb;
1766         rx_ring->rx_stats.non_eop_descs++;
1767
1768         return true;
1769 }
1770
1771 /**
1772  * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail
1773  * @rx_ring: rx descriptor ring packet is being transacted on
1774  * @skb: pointer to current skb being adjusted
1775  *
1776  * This function is an ixgbe specific version of __pskb_pull_tail.  The
1777  * main difference between this version and the original function is that
1778  * this function can make several assumptions about the state of things
1779  * that allow for significant optimizations versus the standard function.
1780  * As a result we can do things like drop a frag and maintain an accurate
1781  * truesize for the skb.
1782  */
1783 static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring,
1784                             struct sk_buff *skb)
1785 {
1786         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
1787         unsigned char *va;
1788         unsigned int pull_len;
1789
1790         /*
1791          * it is valid to use page_address instead of kmap since we are
1792          * working with pages allocated out of the lomem pool per
1793          * alloc_page(GFP_ATOMIC)
1794          */
1795         va = skb_frag_address(frag);
1796
1797         /*
1798          * we need the header to contain the greater of either ETH_HLEN or
1799          * 60 bytes if the skb->len is less than 60 for skb_pad.
1800          */
1801         pull_len = eth_get_headlen(va, IXGBE_RX_HDR_SIZE);
1802
1803         /* align pull length to size of long to optimize memcpy performance */
1804         skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
1805
1806         /* update all of the pointers */
1807         skb_frag_size_sub(frag, pull_len);
1808         frag->page_offset += pull_len;
1809         skb->data_len -= pull_len;
1810         skb->tail += pull_len;
1811 }
1812
1813 /**
1814  * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB
1815  * @rx_ring: rx descriptor ring packet is being transacted on
1816  * @skb: pointer to current skb being updated
1817  *
1818  * This function provides a basic DMA sync up for the first fragment of an
1819  * skb.  The reason for doing this is that the first fragment cannot be
1820  * unmapped until we have reached the end of packet descriptor for a buffer
1821  * chain.
1822  */
1823 static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring,
1824                                 struct sk_buff *skb)
1825 {
1826         /* if the page was released unmap it, else just sync our portion */
1827         if (unlikely(IXGBE_CB(skb)->page_released)) {
1828                 dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
1829                                      ixgbe_rx_pg_size(rx_ring),
1830                                      DMA_FROM_DEVICE,
1831                                      IXGBE_RX_DMA_ATTR);
1832         } else if (ring_uses_build_skb(rx_ring)) {
1833                 unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK;
1834
1835                 dma_sync_single_range_for_cpu(rx_ring->dev,
1836                                               IXGBE_CB(skb)->dma,
1837                                               offset,
1838                                               skb_headlen(skb),
1839                                               DMA_FROM_DEVICE);
1840         } else {
1841                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
1842
1843                 dma_sync_single_range_for_cpu(rx_ring->dev,
1844                                               IXGBE_CB(skb)->dma,
1845                                               frag->page_offset,
1846                                               skb_frag_size(frag),
1847                                               DMA_FROM_DEVICE);
1848         }
1849 }
1850
1851 /**
1852  * ixgbe_cleanup_headers - Correct corrupted or empty headers
1853  * @rx_ring: rx descriptor ring packet is being transacted on
1854  * @rx_desc: pointer to the EOP Rx descriptor
1855  * @skb: pointer to current skb being fixed
1856  *
1857  * Check if the skb is valid in the XDP case it will be an error pointer.
1858  * Return true in this case to abort processing and advance to next
1859  * descriptor.
1860  *
1861  * Check for corrupted packet headers caused by senders on the local L2
1862  * embedded NIC switch not setting up their Tx Descriptors right.  These
1863  * should be very rare.
1864  *
1865  * Also address the case where we are pulling data in on pages only
1866  * and as such no data is present in the skb header.
1867  *
1868  * In addition if skb is not at least 60 bytes we need to pad it so that
1869  * it is large enough to qualify as a valid Ethernet frame.
1870  *
1871  * Returns true if an error was encountered and skb was freed.
1872  **/
1873 bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
1874                            union ixgbe_adv_rx_desc *rx_desc,
1875                            struct sk_buff *skb)
1876 {
1877         struct net_device *netdev = rx_ring->netdev;
1878
1879         /* XDP packets use error pointer so abort at this point */
1880         if (IS_ERR(skb))
1881                 return true;
1882
1883         /* Verify netdev is present, and that packet does not have any
1884          * errors that would be unacceptable to the netdev.
1885          */
1886         if (!netdev ||
1887             (unlikely(ixgbe_test_staterr(rx_desc,
1888                                          IXGBE_RXDADV_ERR_FRAME_ERR_MASK) &&
1889              !(netdev->features & NETIF_F_RXALL)))) {
1890                 dev_kfree_skb_any(skb);
1891                 return true;
1892         }
1893
1894         /* place header in linear portion of buffer */
1895         if (!skb_headlen(skb))
1896                 ixgbe_pull_tail(rx_ring, skb);
1897
1898 #ifdef IXGBE_FCOE
1899         /* do not attempt to pad FCoE Frames as this will disrupt DDP */
1900         if (ixgbe_rx_is_fcoe(rx_ring, rx_desc))
1901                 return false;
1902
1903 #endif
1904         /* if eth_skb_pad returns an error the skb was freed */
1905         if (eth_skb_pad(skb))
1906                 return true;
1907
1908         return false;
1909 }
1910
1911 /**
1912  * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring
1913  * @rx_ring: rx descriptor ring to store buffers on
1914  * @old_buff: donor buffer to have page reused
1915  *
1916  * Synchronizes page for reuse by the adapter
1917  **/
1918 static void ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring,
1919                                 struct ixgbe_rx_buffer *old_buff)
1920 {
1921         struct ixgbe_rx_buffer *new_buff;
1922         u16 nta = rx_ring->next_to_alloc;
1923
1924         new_buff = &rx_ring->rx_buffer_info[nta];
1925
1926         /* update, and store next to alloc */
1927         nta++;
1928         rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1929
1930         /* Transfer page from old buffer to new buffer.
1931          * Move each member individually to avoid possible store
1932          * forwarding stalls and unnecessary copy of skb.
1933          */
1934         new_buff->dma           = old_buff->dma;
1935         new_buff->page          = old_buff->page;
1936         new_buff->page_offset   = old_buff->page_offset;
1937         new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
1938 }
1939
1940 static inline bool ixgbe_page_is_reserved(struct page *page)
1941 {
1942         return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
1943 }
1944
1945 static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
1946 {
1947         unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
1948         struct page *page = rx_buffer->page;
1949
1950         /* avoid re-using remote pages */
1951         if (unlikely(ixgbe_page_is_reserved(page)))
1952                 return false;
1953
1954 #if (PAGE_SIZE < 8192)
1955         /* if we are only owner of page we can reuse it */
1956         if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
1957                 return false;
1958 #else
1959         /* The last offset is a bit aggressive in that we assume the
1960          * worst case of FCoE being enabled and using a 3K buffer.
1961          * However this should have minimal impact as the 1K extra is
1962          * still less than one buffer in size.
1963          */
1964 #define IXGBE_LAST_OFFSET \
1965         (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBE_RXBUFFER_3K)
1966         if (rx_buffer->page_offset > IXGBE_LAST_OFFSET)
1967                 return false;
1968 #endif
1969
1970         /* If we have drained the page fragment pool we need to update
1971          * the pagecnt_bias and page count so that we fully restock the
1972          * number of references the driver holds.
1973          */
1974         if (unlikely(pagecnt_bias == 1)) {
1975                 page_ref_add(page, USHRT_MAX - 1);
1976                 rx_buffer->pagecnt_bias = USHRT_MAX;
1977         }
1978
1979         return true;
1980 }
1981
1982 /**
1983  * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff
1984  * @rx_ring: rx descriptor ring to transact packets on
1985  * @rx_buffer: buffer containing page to add
1986  * @skb: sk_buff to place the data into
1987  * @size: size of data in rx_buffer
1988  *
1989  * This function will add the data contained in rx_buffer->page to the skb.
1990  * This is done either through a direct copy if the data in the buffer is
1991  * less than the skb header size, otherwise it will just attach the page as
1992  * a frag to the skb.
1993  *
1994  * The function will then update the page offset if necessary and return
1995  * true if the buffer can be reused by the adapter.
1996  **/
1997 static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
1998                               struct ixgbe_rx_buffer *rx_buffer,
1999                               struct sk_buff *skb,
2000                               unsigned int size)
2001 {
2002 #if (PAGE_SIZE < 8192)
2003         unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
2004 #else
2005         unsigned int truesize = ring_uses_build_skb(rx_ring) ?
2006                                 SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) :
2007                                 SKB_DATA_ALIGN(size);
2008 #endif
2009         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
2010                         rx_buffer->page_offset, size, truesize);
2011 #if (PAGE_SIZE < 8192)
2012         rx_buffer->page_offset ^= truesize;
2013 #else
2014         rx_buffer->page_offset += truesize;
2015 #endif
2016 }
2017
2018 static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring,
2019                                                    union ixgbe_adv_rx_desc *rx_desc,
2020                                                    struct sk_buff **skb,
2021                                                    const unsigned int size)
2022 {
2023         struct ixgbe_rx_buffer *rx_buffer;
2024
2025         rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
2026         prefetchw(rx_buffer->page);
2027         *skb = rx_buffer->skb;
2028
2029         /* Delay unmapping of the first packet. It carries the header
2030          * information, HW may still access the header after the writeback.
2031          * Only unmap it when EOP is reached
2032          */
2033         if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) {
2034                 if (!*skb)
2035                         goto skip_sync;
2036         } else {
2037                 if (*skb)
2038                         ixgbe_dma_sync_frag(rx_ring, *skb);
2039         }
2040
2041         /* we are reusing so sync this buffer for CPU use */
2042         dma_sync_single_range_for_cpu(rx_ring->dev,
2043                                       rx_buffer->dma,
2044                                       rx_buffer->page_offset,
2045                                       size,
2046                                       DMA_FROM_DEVICE);
2047 skip_sync:
2048         rx_buffer->pagecnt_bias--;
2049
2050         return rx_buffer;
2051 }
2052
2053 static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
2054                                 struct ixgbe_rx_buffer *rx_buffer,
2055                                 struct sk_buff *skb)
2056 {
2057         if (ixgbe_can_reuse_rx_page(rx_buffer)) {
2058                 /* hand second half of page back to the ring */
2059                 ixgbe_reuse_rx_page(rx_ring, rx_buffer);
2060         } else {
2061                 if (!IS_ERR(skb) && IXGBE_CB(skb)->dma == rx_buffer->dma) {
2062                         /* the page has been released from the ring */
2063                         IXGBE_CB(skb)->page_released = true;
2064                 } else {
2065                         /* we are not reusing the buffer so unmap it */
2066                         dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
2067                                              ixgbe_rx_pg_size(rx_ring),
2068                                              DMA_FROM_DEVICE,
2069                                              IXGBE_RX_DMA_ATTR);
2070                 }
2071                 __page_frag_cache_drain(rx_buffer->page,
2072                                         rx_buffer->pagecnt_bias);
2073         }
2074
2075         /* clear contents of rx_buffer */
2076         rx_buffer->page = NULL;
2077         rx_buffer->skb = NULL;
2078 }
2079
2080 static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
2081                                            struct ixgbe_rx_buffer *rx_buffer,
2082                                            struct xdp_buff *xdp,
2083                                            union ixgbe_adv_rx_desc *rx_desc)
2084 {
2085         unsigned int size = xdp->data_end - xdp->data;
2086 #if (PAGE_SIZE < 8192)
2087         unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
2088 #else
2089         unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end -
2090                                                xdp->data_hard_start);
2091 #endif
2092         struct sk_buff *skb;
2093
2094         /* prefetch first cache line of first page */
2095         prefetch(xdp->data);
2096 #if L1_CACHE_BYTES < 128
2097         prefetch(xdp->data + L1_CACHE_BYTES);
2098 #endif
2099         /* Note, we get here by enabling legacy-rx via:
2100          *
2101          *    ethtool --set-priv-flags <dev> legacy-rx on
2102          *
2103          * In this mode, we currently get 0 extra XDP headroom as
2104          * opposed to having legacy-rx off, where we process XDP
2105          * packets going to stack via ixgbe_build_skb(). The latter
2106          * provides us currently with 192 bytes of headroom.
2107          *
2108          * For ixgbe_construct_skb() mode it means that the
2109          * xdp->data_meta will always point to xdp->data, since
2110          * the helper cannot expand the head. Should this ever
2111          * change in future for legacy-rx mode on, then lets also
2112          * add xdp->data_meta handling here.
2113          */
2114
2115         /* allocate a skb to store the frags */
2116         skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE);
2117         if (unlikely(!skb))
2118                 return NULL;
2119
2120         if (size > IXGBE_RX_HDR_SIZE) {
2121                 if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
2122                         IXGBE_CB(skb)->dma = rx_buffer->dma;
2123
2124                 skb_add_rx_frag(skb, 0, rx_buffer->page,
2125                                 xdp->data - page_address(rx_buffer->page),
2126                                 size, truesize);
2127 #if (PAGE_SIZE < 8192)
2128                 rx_buffer->page_offset ^= truesize;
2129 #else
2130                 rx_buffer->page_offset += truesize;
2131 #endif
2132         } else {
2133                 memcpy(__skb_put(skb, size),
2134                        xdp->data, ALIGN(size, sizeof(long)));
2135                 rx_buffer->pagecnt_bias++;
2136         }
2137
2138         return skb;
2139 }
2140
2141 static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
2142                                        struct ixgbe_rx_buffer *rx_buffer,
2143                                        struct xdp_buff *xdp,
2144                                        union ixgbe_adv_rx_desc *rx_desc)
2145 {
2146         unsigned int metasize = xdp->data - xdp->data_meta;
2147 #if (PAGE_SIZE < 8192)
2148         unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
2149 #else
2150         unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
2151                                 SKB_DATA_ALIGN(xdp->data_end -
2152                                                xdp->data_hard_start);
2153 #endif
2154         struct sk_buff *skb;
2155
2156         /* Prefetch first cache line of first page. If xdp->data_meta
2157          * is unused, this points extactly as xdp->data, otherwise we
2158          * likely have a consumer accessing first few bytes of meta
2159          * data, and then actual data.
2160          */
2161         prefetch(xdp->data_meta);
2162 #if L1_CACHE_BYTES < 128
2163         prefetch(xdp->data_meta + L1_CACHE_BYTES);
2164 #endif
2165
2166         /* build an skb to around the page buffer */
2167         skb = build_skb(xdp->data_hard_start, truesize);
2168         if (unlikely(!skb))
2169                 return NULL;
2170
2171         /* update pointers within the skb to store the data */
2172         skb_reserve(skb, xdp->data - xdp->data_hard_start);
2173         __skb_put(skb, xdp->data_end - xdp->data);
2174         if (metasize)
2175                 skb_metadata_set(skb, metasize);
2176
2177         /* record DMA address if this is the start of a chain of buffers */
2178         if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
2179                 IXGBE_CB(skb)->dma = rx_buffer->dma;
2180
2181         /* update buffer offset */
2182 #if (PAGE_SIZE < 8192)
2183         rx_buffer->page_offset ^= truesize;
2184 #else
2185         rx_buffer->page_offset += truesize;
2186 #endif
2187
2188         return skb;
2189 }
2190
2191 static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
2192                                      struct ixgbe_ring *rx_ring,
2193                                      struct xdp_buff *xdp)
2194 {
2195         int err, result = IXGBE_XDP_PASS;
2196         struct bpf_prog *xdp_prog;
2197         struct xdp_frame *xdpf;
2198         u32 act;
2199
2200         rcu_read_lock();
2201         xdp_prog = READ_ONCE(rx_ring->xdp_prog);
2202
2203         if (!xdp_prog)
2204                 goto xdp_out;
2205
2206         prefetchw(xdp->data_hard_start); /* xdp_frame write */
2207
2208         act = bpf_prog_run_xdp(xdp_prog, xdp);
2209         switch (act) {
2210         case XDP_PASS:
2211                 break;
2212         case XDP_TX:
2213                 xdpf = convert_to_xdp_frame(xdp);
2214                 if (unlikely(!xdpf)) {
2215                         result = IXGBE_XDP_CONSUMED;
2216                         break;
2217                 }
2218                 result = ixgbe_xmit_xdp_ring(adapter, xdpf);
2219                 break;
2220         case XDP_REDIRECT:
2221                 err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
2222                 if (!err)
2223                         result = IXGBE_XDP_REDIR;
2224                 else
2225                         result = IXGBE_XDP_CONSUMED;
2226                 break;
2227         default:
2228                 bpf_warn_invalid_xdp_action(act);
2229                 /* fallthrough */
2230         case XDP_ABORTED:
2231                 trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
2232                 /* fallthrough -- handle aborts by dropping packet */
2233         case XDP_DROP:
2234                 result = IXGBE_XDP_CONSUMED;
2235                 break;
2236         }
2237 xdp_out:
2238         rcu_read_unlock();
2239         return ERR_PTR(-result);
2240 }
2241
2242 static void ixgbe_rx_buffer_flip(struct ixgbe_ring *rx_ring,
2243                                  struct ixgbe_rx_buffer *rx_buffer,
2244                                  unsigned int size)
2245 {
2246 #if (PAGE_SIZE < 8192)
2247         unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
2248
2249         rx_buffer->page_offset ^= truesize;
2250 #else
2251         unsigned int truesize = ring_uses_build_skb(rx_ring) ?
2252                                 SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) :
2253                                 SKB_DATA_ALIGN(size);
2254
2255         rx_buffer->page_offset += truesize;
2256 #endif
2257 }
2258
2259 /**
2260  * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
2261  * @q_vector: structure containing interrupt and ring information
2262  * @rx_ring: rx descriptor ring to transact packets on
2263  * @budget: Total limit on number of packets to process
2264  *
2265  * This function provides a "bounce buffer" approach to Rx interrupt
2266  * processing.  The advantage to this is that on systems that have
2267  * expensive overhead for IOMMU access this provides a means of avoiding
2268  * it by maintaining the mapping of the page to the syste.
2269  *
2270  * Returns amount of work completed
2271  **/
2272 static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
2273                                struct ixgbe_ring *rx_ring,
2274                                const int budget)
2275 {
2276         unsigned int total_rx_bytes = 0, total_rx_packets = 0;
2277         struct ixgbe_adapter *adapter = q_vector->adapter;
2278 #ifdef IXGBE_FCOE
2279         int ddp_bytes;
2280         unsigned int mss = 0;
2281 #endif /* IXGBE_FCOE */
2282         u16 cleaned_count = ixgbe_desc_unused(rx_ring);
2283         unsigned int xdp_xmit = 0;
2284         struct xdp_buff xdp;
2285
2286         xdp.rxq = &rx_ring->xdp_rxq;
2287
2288         while (likely(total_rx_packets < budget)) {
2289                 union ixgbe_adv_rx_desc *rx_desc;
2290                 struct ixgbe_rx_buffer *rx_buffer;
2291                 struct sk_buff *skb;
2292                 unsigned int size;
2293
2294                 /* return some buffers to hardware, one at a time is too slow */
2295                 if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
2296                         ixgbe_alloc_rx_buffers(rx_ring, cleaned_count);
2297                         cleaned_count = 0;
2298                 }
2299
2300                 rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
2301                 size = le16_to_cpu(rx_desc->wb.upper.length);
2302                 if (!size)
2303                         break;
2304
2305                 /* This memory barrier is needed to keep us from reading
2306                  * any other fields out of the rx_desc until we know the
2307                  * descriptor has been written back
2308                  */
2309                 dma_rmb();
2310
2311                 rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
2312
2313                 /* retrieve a buffer from the ring */
2314                 if (!skb) {
2315                         xdp.data = page_address(rx_buffer->page) +
2316                                    rx_buffer->page_offset;
2317                         xdp.data_meta = xdp.data;
2318                         xdp.data_hard_start = xdp.data -
2319                                               ixgbe_rx_offset(rx_ring);
2320                         xdp.data_end = xdp.data + size;
2321
2322                         skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
2323                 }
2324
2325                 if (IS_ERR(skb)) {
2326                         unsigned int xdp_res = -PTR_ERR(skb);
2327
2328                         if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
2329                                 xdp_xmit |= xdp_res;
2330                                 ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
2331                         } else {
2332                                 rx_buffer->pagecnt_bias++;
2333                         }
2334                         total_rx_packets++;
2335                         total_rx_bytes += size;
2336                 } else if (skb) {
2337                         ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
2338                 } else if (ring_uses_build_skb(rx_ring)) {
2339                         skb = ixgbe_build_skb(rx_ring, rx_buffer,
2340                                               &xdp, rx_desc);
2341                 } else {
2342                         skb = ixgbe_construct_skb(rx_ring, rx_buffer,
2343                                                   &xdp, rx_desc);
2344                 }
2345
2346                 /* exit if we failed to retrieve a buffer */
2347                 if (!skb) {
2348                         rx_ring->rx_stats.alloc_rx_buff_failed++;
2349                         rx_buffer->pagecnt_bias++;
2350                         break;
2351                 }
2352
2353                 ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);
2354                 cleaned_count++;
2355
2356                 /* place incomplete frames back on ring for completion */
2357                 if (ixgbe_is_non_eop(rx_ring, rx_desc, skb))
2358                         continue;
2359
2360                 /* verify the packet layout is correct */
2361                 if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb))
2362                         continue;
2363
2364                 /* probably a little skewed due to removing CRC */
2365                 total_rx_bytes += skb->len;
2366
2367                 /* populate checksum, timestamp, VLAN, and protocol */
2368                 ixgbe_process_skb_fields(rx_ring, rx_desc, skb);
2369
2370 #ifdef IXGBE_FCOE
2371                 /* if ddp, not passing to ULD unless for FCP_RSP or error */
2372                 if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) {
2373                         ddp_bytes = ixgbe_fcoe_ddp(adapter, rx_desc, skb);
2374                         /* include DDPed FCoE data */
2375                         if (ddp_bytes > 0) {
2376                                 if (!mss) {
2377                                         mss = rx_ring->netdev->mtu -
2378                                                 sizeof(struct fcoe_hdr) -
2379                                                 sizeof(struct fc_frame_header) -
2380                                                 sizeof(struct fcoe_crc_eof);
2381                                         if (mss > 512)
2382                                                 mss &= ~511;
2383                                 }
2384                                 total_rx_bytes += ddp_bytes;
2385                                 total_rx_packets += DIV_ROUND_UP(ddp_bytes,
2386                                                                  mss);
2387                         }
2388                         if (!ddp_bytes) {
2389                                 dev_kfree_skb_any(skb);
2390                                 continue;
2391                         }
2392                 }
2393
2394 #endif /* IXGBE_FCOE */
2395                 ixgbe_rx_skb(q_vector, skb);
2396
2397                 /* update budget accounting */
2398                 total_rx_packets++;
2399         }
2400
2401         if (xdp_xmit & IXGBE_XDP_REDIR)
2402                 xdp_do_flush_map();
2403
2404         if (xdp_xmit & IXGBE_XDP_TX) {
2405                 struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
2406
2407                 /* Force memory writes to complete before letting h/w
2408                  * know there are new descriptors to fetch.
2409                  */
2410                 wmb();
2411                 writel(ring->next_to_use, ring->tail);
2412         }
2413
2414         u64_stats_update_begin(&rx_ring->syncp);
2415         rx_ring->stats.packets += total_rx_packets;
2416         rx_ring->stats.bytes += total_rx_bytes;
2417         u64_stats_update_end(&rx_ring->syncp);
2418         q_vector->rx.total_packets += total_rx_packets;
2419         q_vector->rx.total_bytes += total_rx_bytes;
2420
2421         return total_rx_packets;
2422 }
2423
2424 /**
2425  * ixgbe_configure_msix - Configure MSI-X hardware
2426  * @adapter: board private structure
2427  *
2428  * ixgbe_configure_msix sets up the hardware to properly generate MSI-X
2429  * interrupts.
2430  **/
2431 static void ixgbe_configure_msix(struct ixgbe_adapter *adapter)
2432 {
2433         struct ixgbe_q_vector *q_vector;
2434         int v_idx;
2435         u32 mask;
2436
2437         /* Populate MSIX to EITR Select */
2438         if (adapter->num_vfs > 32) {
2439                 u32 eitrsel = BIT(adapter->num_vfs - 32) - 1;
2440                 IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, eitrsel);
2441         }
2442
2443         /*
2444          * Populate the IVAR table and set the ITR values to the
2445          * corresponding register.
2446          */
2447         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
2448                 struct ixgbe_ring *ring;
2449                 q_vector = adapter->q_vector[v_idx];
2450
2451                 ixgbe_for_each_ring(ring, q_vector->rx)
2452                         ixgbe_set_ivar(adapter, 0, ring->reg_idx, v_idx);
2453
2454                 ixgbe_for_each_ring(ring, q_vector->tx)
2455                         ixgbe_set_ivar(adapter, 1, ring->reg_idx, v_idx);
2456
2457                 ixgbe_write_eitr(q_vector);
2458         }
2459
2460         switch (adapter->hw.mac.type) {
2461         case ixgbe_mac_82598EB:
2462                 ixgbe_set_ivar(adapter, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
2463                                v_idx);
2464                 break;
2465         case ixgbe_mac_82599EB:
2466         case ixgbe_mac_X540:
2467         case ixgbe_mac_X550:
2468         case ixgbe_mac_X550EM_x:
2469         case ixgbe_mac_x550em_a:
2470                 ixgbe_set_ivar(adapter, -1, 1, v_idx);
2471                 break;
2472         default:
2473                 break;
2474         }
2475         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(v_idx), 1950);
2476
2477         /* set up to autoclear timer, and the vectors */
2478         mask = IXGBE_EIMS_ENABLE_MASK;
2479         mask &= ~(IXGBE_EIMS_OTHER |
2480                   IXGBE_EIMS_MAILBOX |
2481                   IXGBE_EIMS_LSC);
2482
2483         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask);
2484 }
2485
2486 /**
2487  * ixgbe_update_itr - update the dynamic ITR value based on statistics
2488  * @q_vector: structure containing interrupt and ring information
2489  * @ring_container: structure containing ring performance data
2490  *
2491  *      Stores a new ITR value based on packets and byte
2492  *      counts during the last interrupt.  The advantage of per interrupt
2493  *      computation is faster updates and more accurate ITR for the current
2494  *      traffic pattern.  Constants in this function were computed
2495  *      based on theoretical maximum wire speed and thresholds were set based
2496  *      on testing data as well as attempting to minimize response time
2497  *      while increasing bulk throughput.
2498  **/
2499 static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
2500                              struct ixgbe_ring_container *ring_container)
2501 {
2502         unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
2503                            IXGBE_ITR_ADAPTIVE_LATENCY;
2504         unsigned int avg_wire_size, packets, bytes;
2505         unsigned long next_update = jiffies;
2506
2507         /* If we don't have any rings just leave ourselves set for maximum
2508          * possible latency so we take ourselves out of the equation.
2509          */
2510         if (!ring_container->ring)
2511                 return;
2512
2513         /* If we didn't update within up to 1 - 2 jiffies we can assume
2514          * that either packets are coming in so slow there hasn't been
2515          * any work, or that there is so much work that NAPI is dealing
2516          * with interrupt moderation and we don't need to do anything.
2517          */
2518         if (time_after(next_update, ring_container->next_update))
2519                 goto clear_counts;
2520
2521         packets = ring_container->total_packets;
2522
2523         /* We have no packets to actually measure against. This means
2524          * either one of the other queues on this vector is active or
2525          * we are a Tx queue doing TSO with too high of an interrupt rate.
2526          *
2527          * When this occurs just tick up our delay by the minimum value
2528          * and hope that this extra delay will prevent us from being called
2529          * without any work on our queue.
2530          */
2531         if (!packets) {
2532                 itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
2533                 if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
2534                         itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
2535                 itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
2536                 goto clear_counts;
2537         }
2538
2539         bytes = ring_container->total_bytes;
2540
2541         /* If packets are less than 4 or bytes are less than 9000 assume
2542          * insufficient data to use bulk rate limiting approach. We are
2543          * likely latency driven.
2544          */
2545         if (packets < 4 && bytes < 9000) {
2546                 itr = IXGBE_ITR_ADAPTIVE_LATENCY;
2547                 goto adjust_by_size;
2548         }
2549
2550         /* Between 4 and 48 we can assume that our current interrupt delay
2551          * is only slightly too low. As such we should increase it by a small
2552          * fixed amount.
2553          */
2554         if (packets < 48) {
2555                 itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
2556                 if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
2557                         itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
2558                 goto clear_counts;
2559         }
2560
2561         /* Between 48 and 96 is our "goldilocks" zone where we are working
2562          * out "just right". Just report that our current ITR is good for us.
2563          */
2564         if (packets < 96) {
2565                 itr = q_vector->itr >> 2;
2566                 goto clear_counts;
2567         }
2568
2569         /* If packet count is 96 or greater we are likely looking at a slight
2570          * overrun of the delay we want. Try halving our delay to see if that
2571          * will cut the number of packets in half per interrupt.
2572          */
2573         if (packets < 256) {
2574                 itr = q_vector->itr >> 3;
2575                 if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
2576                         itr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
2577                 goto clear_counts;
2578         }
2579
2580         /* The paths below assume we are dealing with a bulk ITR since number
2581          * of packets is 256 or greater. We are just going to have to compute
2582          * a value and try to bring the count under control, though for smaller
2583          * packet sizes there isn't much we can do as NAPI polling will likely
2584          * be kicking in sooner rather than later.
2585          */
2586         itr = IXGBE_ITR_ADAPTIVE_BULK;
2587
2588 adjust_by_size:
2589         /* If packet counts are 256 or greater we can assume we have a gross
2590          * overestimation of what the rate should be. Instead of trying to fine
2591          * tune it just use the formula below to try and dial in an exact value
2592          * give the current packet size of the frame.
2593          */
2594         avg_wire_size = bytes / packets;
2595
2596         /* The following is a crude approximation of:
2597          *  wmem_default / (size + overhead) = desired_pkts_per_int
2598          *  rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
2599          *  (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
2600          *
2601          * Assuming wmem_default is 212992 and overhead is 640 bytes per
2602          * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
2603          * formula down to
2604          *
2605          *  (170 * (size + 24)) / (size + 640) = ITR
2606          *
2607          * We first do some math on the packet size and then finally bitshift
2608          * by 8 after rounding up. We also have to account for PCIe link speed
2609          * difference as ITR scales based on this.
2610          */
2611         if (avg_wire_size <= 60) {
2612                 /* Start at 50k ints/sec */
2613                 avg_wire_size = 5120;
2614         } else if (avg_wire_size <= 316) {
2615                 /* 50K ints/sec to 16K ints/sec */
2616                 avg_wire_size *= 40;
2617                 avg_wire_size += 2720;
2618         } else if (avg_wire_size <= 1084) {
2619                 /* 16K ints/sec to 9.2K ints/sec */
2620                 avg_wire_size *= 15;
2621                 avg_wire_size += 11452;
2622         } else if (avg_wire_size <= 1980) {
2623                 /* 9.2K ints/sec to 8K ints/sec */
2624                 avg_wire_size *= 5;
2625                 avg_wire_size += 22420;
2626         } else {
2627                 /* plateau at a limit of 8K ints/sec */
2628                 avg_wire_size = 32256;
2629         }
2630
2631         /* If we are in low latency mode half our delay which doubles the rate
2632          * to somewhere between 100K to 16K ints/sec
2633          */
2634         if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
2635                 avg_wire_size >>= 1;
2636
2637         /* Resultant value is 256 times larger than it needs to be. This
2638          * gives us room to adjust the value as needed to either increase
2639          * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
2640          *
2641          * Use addition as we have already recorded the new latency flag
2642          * for the ITR value.
2643          */
2644         switch (q_vector->adapter->link_speed) {
2645         case IXGBE_LINK_SPEED_10GB_FULL:
2646         case IXGBE_LINK_SPEED_100_FULL:
2647         default:
2648                 itr += DIV_ROUND_UP(avg_wire_size,
2649                                     IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
2650                        IXGBE_ITR_ADAPTIVE_MIN_INC;
2651                 break;
2652         case IXGBE_LINK_SPEED_2_5GB_FULL:
2653         case IXGBE_LINK_SPEED_1GB_FULL:
2654         case IXGBE_LINK_SPEED_10_FULL:
2655                 itr += DIV_ROUND_UP(avg_wire_size,
2656                                     IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
2657                        IXGBE_ITR_ADAPTIVE_MIN_INC;
2658                 break;
2659         }
2660
2661 clear_counts:
2662         /* write back value */
2663         ring_container->itr = itr;
2664
2665         /* next update should occur within next jiffy */
2666         ring_container->next_update = next_update + 1;
2667
2668         ring_container->total_bytes = 0;
2669         ring_container->total_packets = 0;
2670 }
2671
2672 /**
2673  * ixgbe_write_eitr - write EITR register in hardware specific way
2674  * @q_vector: structure containing interrupt and ring information
2675  *
2676  * This function is made to be called by ethtool and by the driver
2677  * when it needs to update EITR registers at runtime.  Hardware
2678  * specific quirks/differences are taken care of here.
2679  */
2680 void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector)
2681 {
2682         struct ixgbe_adapter *adapter = q_vector->adapter;
2683         struct ixgbe_hw *hw = &adapter->hw;
2684         int v_idx = q_vector->v_idx;
2685         u32 itr_reg = q_vector->itr & IXGBE_MAX_EITR;
2686
2687         switch (adapter->hw.mac.type) {
2688         case ixgbe_mac_82598EB:
2689                 /* must write high and low 16 bits to reset counter */
2690                 itr_reg |= (itr_reg << 16);
2691                 break;
2692         case ixgbe_mac_82599EB:
2693         case ixgbe_mac_X540:
2694         case ixgbe_mac_X550:
2695         case ixgbe_mac_X550EM_x:
2696         case ixgbe_mac_x550em_a:
2697                 /*
2698                  * set the WDIS bit to not clear the timer bits and cause an
2699                  * immediate assertion of the interrupt
2700                  */
2701                 itr_reg |= IXGBE_EITR_CNT_WDIS;
2702                 break;
2703         default:
2704                 break;
2705         }
2706         IXGBE_WRITE_REG(hw, IXGBE_EITR(v_idx), itr_reg);
2707 }
2708
2709 static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
2710 {
2711         u32 new_itr;
2712
2713         ixgbe_update_itr(q_vector, &q_vector->tx);
2714         ixgbe_update_itr(q_vector, &q_vector->rx);
2715
2716         /* use the smallest value of new ITR delay calculations */
2717         new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
2718
2719         /* Clear latency flag if set, shift into correct position */
2720         new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
2721         new_itr <<= 2;
2722
2723         if (new_itr != q_vector->itr) {
2724                 /* save the algorithm value here */
2725                 q_vector->itr = new_itr;
2726
2727                 ixgbe_write_eitr(q_vector);
2728         }
2729 }
2730
2731 /**
2732  * ixgbe_check_overtemp_subtask - check for over temperature
2733  * @adapter: pointer to adapter
2734  **/
2735 static void ixgbe_check_overtemp_subtask(struct ixgbe_adapter *adapter)
2736 {
2737         struct ixgbe_hw *hw = &adapter->hw;
2738         u32 eicr = adapter->interrupt_event;
2739         s32 rc;
2740
2741         if (test_bit(__IXGBE_DOWN, &adapter->state))
2742                 return;
2743
2744         if (!(adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_EVENT))
2745                 return;
2746
2747         adapter->flags2 &= ~IXGBE_FLAG2_TEMP_SENSOR_EVENT;
2748
2749         switch (hw->device_id) {
2750         case IXGBE_DEV_ID_82599_T3_LOM:
2751                 /*
2752                  * Since the warning interrupt is for both ports
2753                  * we don't have to check if:
2754                  *  - This interrupt wasn't for our port.
2755                  *  - We may have missed the interrupt so always have to
2756                  *    check if we  got a LSC
2757                  */
2758                 if (!(eicr & IXGBE_EICR_GPI_SDP0_8259X) &&
2759                     !(eicr & IXGBE_EICR_LSC))
2760                         return;
2761
2762                 if (!(eicr & IXGBE_EICR_LSC) && hw->mac.ops.check_link) {
2763                         u32 speed;
2764                         bool link_up = false;
2765
2766                         hw->mac.ops.check_link(hw, &speed, &link_up, false);
2767
2768                         if (link_up)
2769                                 return;
2770                 }
2771
2772                 /* Check if this is not due to overtemp */
2773                 if (hw->phy.ops.check_overtemp(hw) != IXGBE_ERR_OVERTEMP)
2774                         return;
2775
2776                 break;
2777         case IXGBE_DEV_ID_X550EM_A_1G_T:
2778         case IXGBE_DEV_ID_X550EM_A_1G_T_L:
2779                 rc = hw->phy.ops.check_overtemp(hw);
2780                 if (rc != IXGBE_ERR_OVERTEMP)
2781                         return;
2782                 break;
2783         default:
2784                 if (adapter->hw.mac.type >= ixgbe_mac_X540)
2785                         return;
2786                 if (!(eicr & IXGBE_EICR_GPI_SDP0(hw)))
2787                         return;
2788                 break;
2789         }
2790         e_crit(drv, "%s\n", ixgbe_overheat_msg);
2791
2792         adapter->interrupt_event = 0;
2793 }
2794
2795 static void ixgbe_check_fan_failure(struct ixgbe_adapter *adapter, u32 eicr)
2796 {
2797         struct ixgbe_hw *hw = &adapter->hw;
2798
2799         if ((adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) &&
2800             (eicr & IXGBE_EICR_GPI_SDP1(hw))) {
2801                 e_crit(probe, "Fan has stopped, replace the adapter\n");
2802                 /* write to clear the interrupt */
2803                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1(hw));
2804         }
2805 }
2806
2807 static void ixgbe_check_overtemp_event(struct ixgbe_adapter *adapter, u32 eicr)
2808 {
2809         struct ixgbe_hw *hw = &adapter->hw;
2810
2811         if (!(adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE))
2812                 return;
2813
2814         switch (adapter->hw.mac.type) {
2815         case ixgbe_mac_82599EB:
2816                 /*
2817                  * Need to check link state so complete overtemp check
2818                  * on service task
2819                  */
2820                 if (((eicr & IXGBE_EICR_GPI_SDP0(hw)) ||
2821                      (eicr & IXGBE_EICR_LSC)) &&
2822                     (!test_bit(__IXGBE_DOWN, &adapter->state))) {
2823                         adapter->interrupt_event = eicr;
2824                         adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_EVENT;
2825                         ixgbe_service_event_schedule(adapter);
2826                         return;
2827                 }
2828                 return;
2829         case ixgbe_mac_x550em_a:
2830                 if (eicr & IXGBE_EICR_GPI_SDP0_X550EM_a) {
2831                         adapter->interrupt_event = eicr;
2832                         adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_EVENT;
2833                         ixgbe_service_event_schedule(adapter);
2834                         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC,
2835                                         IXGBE_EICR_GPI_SDP0_X550EM_a);
2836                         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICR,
2837                                         IXGBE_EICR_GPI_SDP0_X550EM_a);
2838                 }
2839                 return;
2840         case ixgbe_mac_X550:
2841         case ixgbe_mac_X540:
2842                 if (!(eicr & IXGBE_EICR_TS))
2843                         return;
2844                 break;
2845         default:
2846                 return;
2847         }
2848
2849         e_crit(drv, "%s\n", ixgbe_overheat_msg);
2850 }
2851
2852 static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw)
2853 {
2854         switch (hw->mac.type) {
2855         case ixgbe_mac_82598EB:
2856                 if (hw->phy.type == ixgbe_phy_nl)
2857                         return true;
2858                 return false;
2859         case ixgbe_mac_82599EB:
2860         case ixgbe_mac_X550EM_x:
2861         case ixgbe_mac_x550em_a:
2862                 switch (hw->mac.ops.get_media_type(hw)) {
2863                 case ixgbe_media_type_fiber:
2864                 case ixgbe_media_type_fiber_qsfp:
2865                         return true;
2866                 default:
2867                         return false;
2868                 }
2869         default:
2870                 return false;
2871         }
2872 }
2873
2874 static void ixgbe_check_sfp_event(struct ixgbe_adapter *adapter, u32 eicr)
2875 {
2876         struct ixgbe_hw *hw = &adapter->hw;
2877         u32 eicr_mask = IXGBE_EICR_GPI_SDP2(hw);
2878
2879         if (!ixgbe_is_sfp(hw))
2880                 return;
2881
2882         /* Later MAC's use different SDP */
2883         if (hw->mac.type >= ixgbe_mac_X540)
2884                 eicr_mask = IXGBE_EICR_GPI_SDP0_X540;
2885
2886         if (eicr & eicr_mask) {
2887                 /* Clear the interrupt */
2888                 IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask);
2889                 if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
2890                         adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET;
2891                         adapter->sfp_poll_time = 0;
2892                         ixgbe_service_event_schedule(adapter);
2893                 }
2894         }
2895
2896         if (adapter->hw.mac.type == ixgbe_mac_82599EB &&
2897             (eicr & IXGBE_EICR_GPI_SDP1(hw))) {
2898                 /* Clear the interrupt */
2899                 IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1(hw));
2900                 if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
2901                         adapter->flags |= IXGBE_FLAG_NEED_LINK_CONFIG;
2902                         ixgbe_service_event_schedule(adapter);
2903                 }
2904         }
2905 }
2906
2907 static void ixgbe_check_lsc(struct ixgbe_adapter *adapter)
2908 {
2909         struct ixgbe_hw *hw = &adapter->hw;
2910
2911         adapter->lsc_int++;
2912         adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
2913         adapter->link_check_timeout = jiffies;
2914         if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
2915                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC);
2916                 IXGBE_WRITE_FLUSH(hw);
2917                 ixgbe_service_event_schedule(adapter);
2918         }
2919 }
2920
2921 static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter,
2922                                            u64 qmask)
2923 {
2924         u32 mask;
2925         struct ixgbe_hw *hw = &adapter->hw;
2926
2927         switch (hw->mac.type) {
2928         case ixgbe_mac_82598EB:
2929                 mask = (IXGBE_EIMS_RTX_QUEUE & qmask);
2930                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask);
2931                 break;
2932         case ixgbe_mac_82599EB:
2933         case ixgbe_mac_X540:
2934         case ixgbe_mac_X550:
2935         case ixgbe_mac_X550EM_x:
2936         case ixgbe_mac_x550em_a:
2937                 mask = (qmask & 0xFFFFFFFF);
2938                 if (mask)
2939                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
2940                 mask = (qmask >> 32);
2941                 if (mask)
2942                         IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask);
2943                 break;
2944         default:
2945                 break;
2946         }
2947         /* skip the flush */
2948 }
2949
2950 static inline void ixgbe_irq_disable_queues(struct ixgbe_adapter *adapter,
2951                                             u64 qmask)
2952 {
2953         u32 mask;
2954         struct ixgbe_hw *hw = &adapter->hw;
2955
2956         switch (hw->mac.type) {
2957         case ixgbe_mac_82598EB:
2958                 mask = (IXGBE_EIMS_RTX_QUEUE & qmask);
2959                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
2960                 break;
2961         case ixgbe_mac_82599EB:
2962         case ixgbe_mac_X540:
2963         case ixgbe_mac_X550:
2964         case ixgbe_mac_X550EM_x:
2965         case ixgbe_mac_x550em_a:
2966                 mask = (qmask & 0xFFFFFFFF);
2967                 if (mask)
2968                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
2969                 mask = (qmask >> 32);
2970                 if (mask)
2971                         IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
2972                 break;
2973         default:
2974                 break;
2975         }
2976         /* skip the flush */
2977 }
2978
2979 /**
2980  * ixgbe_irq_enable - Enable default interrupt generation settings
2981  * @adapter: board private structure
2982  * @queues: enable irqs for queues
2983  * @flush: flush register write
2984  **/
2985 static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues,
2986                                     bool flush)
2987 {
2988         struct ixgbe_hw *hw = &adapter->hw;
2989         u32 mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE);
2990
2991         /* don't reenable LSC while waiting for link */
2992         if (adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)
2993                 mask &= ~IXGBE_EIMS_LSC;
2994
2995         if (adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE)
2996                 switch (adapter->hw.mac.type) {
2997                 case ixgbe_mac_82599EB:
2998                         mask |= IXGBE_EIMS_GPI_SDP0(hw);
2999                         break;
3000                 case ixgbe_mac_X540:
3001                 case ixgbe_mac_X550:
3002                 case ixgbe_mac_X550EM_x:
3003                 case ixgbe_mac_x550em_a:
3004                         mask |= IXGBE_EIMS_TS;
3005                         break;
3006                 default:
3007                         break;
3008                 }
3009         if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE)
3010                 mask |= IXGBE_EIMS_GPI_SDP1(hw);
3011         switch (adapter->hw.mac.type) {
3012         case ixgbe_mac_82599EB:
3013                 mask |= IXGBE_EIMS_GPI_SDP1(hw);
3014                 mask |= IXGBE_EIMS_GPI_SDP2(hw);
3015                 /* fall through */
3016         case ixgbe_mac_X540:
3017         case ixgbe_mac_X550:
3018         case ixgbe_mac_X550EM_x:
3019         case ixgbe_mac_x550em_a:
3020                 if (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_X_SFP ||
3021                     adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_SFP ||
3022                     adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_SFP_N)
3023                         mask |= IXGBE_EIMS_GPI_SDP0(&adapter->hw);
3024                 if (adapter->hw.phy.type == ixgbe_phy_x550em_ext_t)
3025                         mask |= IXGBE_EICR_GPI_SDP0_X540;
3026                 mask |= IXGBE_EIMS_ECC;
3027                 mask |= IXGBE_EIMS_MAILBOX;
3028                 break;
3029         default:
3030                 break;
3031         }
3032
3033         if ((adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) &&
3034             !(adapter->flags2 & IXGBE_FLAG2_FDIR_REQUIRES_REINIT))
3035                 mask |= IXGBE_EIMS_FLOW_DIR;
3036
3037         IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask);
3038         if (queues)
3039                 ixgbe_irq_enable_queues(adapter, ~0);
3040         if (flush)
3041                 IXGBE_WRITE_FLUSH(&adapter->hw);
3042 }
3043
3044 static irqreturn_t ixgbe_msix_other(int irq, void *data)
3045 {
3046         struct ixgbe_adapter *adapter = data;
3047         struct ixgbe_hw *hw = &adapter->hw;
3048         u32 eicr;
3049
3050         /*
3051          * Workaround for Silicon errata.  Use clear-by-write instead
3052          * of clear-by-read.  Reading with EICS will return the
3053          * interrupt causes without clearing, which later be done
3054          * with the write to EICR.
3055          */
3056         eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
3057
3058         /* The lower 16bits of the EICR register are for the queue interrupts
3059          * which should be masked here in order to not accidentally clear them if
3060          * the bits are high when ixgbe_msix_other is called. There is a race
3061          * condition otherwise which results in possible performance loss
3062          * especially if the ixgbe_msix_other interrupt is triggering
3063          * consistently (as it would when PPS is turned on for the X540 device)
3064          */
3065         eicr &= 0xFFFF0000;
3066
3067         IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr);
3068
3069         if (eicr & IXGBE_EICR_LSC)
3070                 ixgbe_check_lsc(adapter);
3071
3072         if (eicr & IXGBE_EICR_MAILBOX)
3073                 ixgbe_msg_task(adapter);
3074
3075         switch (hw->mac.type) {
3076         case ixgbe_mac_82599EB:
3077         case ixgbe_mac_X540:
3078         case ixgbe_mac_X550:
3079         case ixgbe_mac_X550EM_x:
3080