534787291b44f17a6d47c480e35ee7da5e2e6319
[muen/linux.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, 0444);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, 0444);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50 #ifdef CONFIG_BE2NET_BE2
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 #endif /* CONFIG_BE2NET_BE2 */
54 #ifdef CONFIG_BE2NET_BE3
55         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
56         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
57 #endif /* CONFIG_BE2NET_BE3 */
58 #ifdef CONFIG_BE2NET_LANCER
59         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
60         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
61 #endif /* CONFIG_BE2NET_LANCER */
62 #ifdef CONFIG_BE2NET_SKYHAWK
63         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
64         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
65 #endif /* CONFIG_BE2NET_SKYHAWK */
66         { 0 }
67 };
68 MODULE_DEVICE_TABLE(pci, be_dev_ids);
69
70 /* Workqueue used by all functions for defering cmd calls to the adapter */
71 static struct workqueue_struct *be_wq;
72
73 /* UE Status Low CSR */
74 static const char * const ue_status_low_desc[] = {
75         "CEV",
76         "CTX",
77         "DBUF",
78         "ERX",
79         "Host",
80         "MPU",
81         "NDMA",
82         "PTC ",
83         "RDMA ",
84         "RXF ",
85         "RXIPS ",
86         "RXULP0 ",
87         "RXULP1 ",
88         "RXULP2 ",
89         "TIM ",
90         "TPOST ",
91         "TPRE ",
92         "TXIPS ",
93         "TXULP0 ",
94         "TXULP1 ",
95         "UC ",
96         "WDMA ",
97         "TXULP2 ",
98         "HOST1 ",
99         "P0_OB_LINK ",
100         "P1_OB_LINK ",
101         "HOST_GPIO ",
102         "MBOX ",
103         "ERX2 ",
104         "SPARE ",
105         "JTAG ",
106         "MPU_INTPEND "
107 };
108
109 /* UE Status High CSR */
110 static const char * const ue_status_hi_desc[] = {
111         "LPCMEMHOST",
112         "MGMT_MAC",
113         "PCS0ONLINE",
114         "MPU_IRAM",
115         "PCS1ONLINE",
116         "PCTL0",
117         "PCTL1",
118         "PMEM",
119         "RR",
120         "TXPB",
121         "RXPP",
122         "XAUI",
123         "TXP",
124         "ARM",
125         "IPC",
126         "HOST2",
127         "HOST3",
128         "HOST4",
129         "HOST5",
130         "HOST6",
131         "HOST7",
132         "ECRC",
133         "Poison TLP",
134         "NETC",
135         "PERIPH",
136         "LLTXULP",
137         "D2P",
138         "RCON",
139         "LDMA",
140         "LLTXP",
141         "LLTXPB",
142         "Unknown"
143 };
144
145 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
146                                  BE_IF_FLAGS_BROADCAST | \
147                                  BE_IF_FLAGS_MULTICAST | \
148                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
149
150 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
151 {
152         struct be_dma_mem *mem = &q->dma_mem;
153
154         if (mem->va) {
155                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
156                                   mem->dma);
157                 mem->va = NULL;
158         }
159 }
160
161 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
162                           u16 len, u16 entry_size)
163 {
164         struct be_dma_mem *mem = &q->dma_mem;
165
166         memset(q, 0, sizeof(*q));
167         q->len = len;
168         q->entry_size = entry_size;
169         mem->size = len * entry_size;
170         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
171                                       GFP_KERNEL);
172         if (!mem->va)
173                 return -ENOMEM;
174         return 0;
175 }
176
177 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
178 {
179         u32 reg, enabled;
180
181         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
182                               &reg);
183         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184
185         if (!enabled && enable)
186                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
187         else if (enabled && !enable)
188                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
189         else
190                 return;
191
192         pci_write_config_dword(adapter->pdev,
193                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
194 }
195
196 static void be_intr_set(struct be_adapter *adapter, bool enable)
197 {
198         int status = 0;
199
200         /* On lancer interrupts can't be controlled via this register */
201         if (lancer_chip(adapter))
202                 return;
203
204         if (be_check_error(adapter, BE_ERROR_EEH))
205                 return;
206
207         status = be_cmd_intr_set(adapter, enable);
208         if (status)
209                 be_reg_intr_set(adapter, enable);
210 }
211
212 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
213 {
214         u32 val = 0;
215
216         if (be_check_error(adapter, BE_ERROR_HW))
217                 return;
218
219         val |= qid & DB_RQ_RING_ID_MASK;
220         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
221
222         wmb();
223         iowrite32(val, adapter->db + DB_RQ_OFFSET);
224 }
225
226 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
227                           u16 posted)
228 {
229         u32 val = 0;
230
231         if (be_check_error(adapter, BE_ERROR_HW))
232                 return;
233
234         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
235         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
236
237         wmb();
238         iowrite32(val, adapter->db + txo->db_offset);
239 }
240
241 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
242                          bool arm, bool clear_int, u16 num_popped,
243                          u32 eq_delay_mult_enc)
244 {
245         u32 val = 0;
246
247         val |= qid & DB_EQ_RING_ID_MASK;
248         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
249
250         if (be_check_error(adapter, BE_ERROR_HW))
251                 return;
252
253         if (arm)
254                 val |= 1 << DB_EQ_REARM_SHIFT;
255         if (clear_int)
256                 val |= 1 << DB_EQ_CLR_SHIFT;
257         val |= 1 << DB_EQ_EVNT_SHIFT;
258         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
259         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
260         iowrite32(val, adapter->db + DB_EQ_OFFSET);
261 }
262
263 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
264 {
265         u32 val = 0;
266
267         val |= qid & DB_CQ_RING_ID_MASK;
268         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
269                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
270
271         if (be_check_error(adapter, BE_ERROR_HW))
272                 return;
273
274         if (arm)
275                 val |= 1 << DB_CQ_REARM_SHIFT;
276         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
277         iowrite32(val, adapter->db + DB_CQ_OFFSET);
278 }
279
280 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
281 {
282         int i;
283
284         /* Check if mac has already been added as part of uc-list */
285         for (i = 0; i < adapter->uc_macs; i++) {
286                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
287                         /* mac already added, skip addition */
288                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
289                         return 0;
290                 }
291         }
292
293         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
294                                &adapter->pmac_id[0], 0);
295 }
296
297 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
298 {
299         int i;
300
301         /* Skip deletion if the programmed mac is
302          * being used in uc-list
303          */
304         for (i = 0; i < adapter->uc_macs; i++) {
305                 if (adapter->pmac_id[i + 1] == pmac_id)
306                         return;
307         }
308         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
309 }
310
311 static int be_mac_addr_set(struct net_device *netdev, void *p)
312 {
313         struct be_adapter *adapter = netdev_priv(netdev);
314         struct device *dev = &adapter->pdev->dev;
315         struct sockaddr *addr = p;
316         int status;
317         u8 mac[ETH_ALEN];
318         u32 old_pmac_id = adapter->pmac_id[0];
319
320         if (!is_valid_ether_addr(addr->sa_data))
321                 return -EADDRNOTAVAIL;
322
323         /* Proceed further only if, User provided MAC is different
324          * from active MAC
325          */
326         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
327                 return 0;
328
329         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
330          * address
331          */
332         if (BEx_chip(adapter) && be_virtfn(adapter) &&
333             !check_privilege(adapter, BE_PRIV_FILTMGMT))
334                 return -EPERM;
335
336         /* if device is not running, copy MAC to netdev->dev_addr */
337         if (!netif_running(netdev))
338                 goto done;
339
340         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
341          * privilege or if PF did not provision the new MAC address.
342          * On BE3, this cmd will always fail if the VF doesn't have the
343          * FILTMGMT privilege. This failure is OK, only if the PF programmed
344          * the MAC for the VF.
345          */
346         mutex_lock(&adapter->rx_filter_lock);
347         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
348         if (!status) {
349
350                 /* Delete the old programmed MAC. This call may fail if the
351                  * old MAC was already deleted by the PF driver.
352                  */
353                 if (adapter->pmac_id[0] != old_pmac_id)
354                         be_dev_mac_del(adapter, old_pmac_id);
355         }
356
357         mutex_unlock(&adapter->rx_filter_lock);
358         /* Decide if the new MAC is successfully activated only after
359          * querying the FW
360          */
361         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
362                                        adapter->if_handle, true, 0);
363         if (status)
364                 goto err;
365
366         /* The MAC change did not happen, either due to lack of privilege
367          * or PF didn't pre-provision.
368          */
369         if (!ether_addr_equal(addr->sa_data, mac)) {
370                 status = -EPERM;
371                 goto err;
372         }
373
374         /* Remember currently programmed MAC */
375         ether_addr_copy(adapter->dev_mac, addr->sa_data);
376 done:
377         ether_addr_copy(netdev->dev_addr, addr->sa_data);
378         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
379         return 0;
380 err:
381         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
382         return status;
383 }
384
385 /* BE2 supports only v0 cmd */
386 static void *hw_stats_from_cmd(struct be_adapter *adapter)
387 {
388         if (BE2_chip(adapter)) {
389                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         } else if (BE3_chip(adapter)) {
393                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
394
395                 return &cmd->hw_stats;
396         } else {
397                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
398
399                 return &cmd->hw_stats;
400         }
401 }
402
403 /* BE2 supports only v0 cmd */
404 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
405 {
406         if (BE2_chip(adapter)) {
407                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         } else if (BE3_chip(adapter)) {
411                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
412
413                 return &hw_stats->erx;
414         } else {
415                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
416
417                 return &hw_stats->erx;
418         }
419 }
420
421 static void populate_be_v0_stats(struct be_adapter *adapter)
422 {
423         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
424         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
425         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
426         struct be_port_rxf_stats_v0 *port_stats =
427                                         &rxf_stats->port[adapter->port_num];
428         struct be_drv_stats *drvs = &adapter->drv_stats;
429
430         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
431         drvs->rx_pause_frames = port_stats->rx_pause_frames;
432         drvs->rx_crc_errors = port_stats->rx_crc_errors;
433         drvs->rx_control_frames = port_stats->rx_control_frames;
434         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
435         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
436         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
437         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
438         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
439         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
440         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
441         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
442         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
443         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
444         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
445         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
446         drvs->rx_dropped_header_too_small =
447                 port_stats->rx_dropped_header_too_small;
448         drvs->rx_address_filtered =
449                                         port_stats->rx_address_filtered +
450                                         port_stats->rx_vlan_filtered;
451         drvs->rx_alignment_symbol_errors =
452                 port_stats->rx_alignment_symbol_errors;
453
454         drvs->tx_pauseframes = port_stats->tx_pauseframes;
455         drvs->tx_controlframes = port_stats->tx_controlframes;
456
457         if (adapter->port_num)
458                 drvs->jabber_events = rxf_stats->port1_jabber_events;
459         else
460                 drvs->jabber_events = rxf_stats->port0_jabber_events;
461         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
462         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
463         drvs->forwarded_packets = rxf_stats->forwarded_packets;
464         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
465         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
466         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
467         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
468 }
469
470 static void populate_be_v1_stats(struct be_adapter *adapter)
471 {
472         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
473         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
474         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
475         struct be_port_rxf_stats_v1 *port_stats =
476                                         &rxf_stats->port[adapter->port_num];
477         struct be_drv_stats *drvs = &adapter->drv_stats;
478
479         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
480         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
481         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
482         drvs->rx_pause_frames = port_stats->rx_pause_frames;
483         drvs->rx_crc_errors = port_stats->rx_crc_errors;
484         drvs->rx_control_frames = port_stats->rx_control_frames;
485         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
486         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
487         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
488         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
489         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
490         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
491         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
492         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
493         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
494         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
495         drvs->rx_dropped_header_too_small =
496                 port_stats->rx_dropped_header_too_small;
497         drvs->rx_input_fifo_overflow_drop =
498                 port_stats->rx_input_fifo_overflow_drop;
499         drvs->rx_address_filtered = port_stats->rx_address_filtered;
500         drvs->rx_alignment_symbol_errors =
501                 port_stats->rx_alignment_symbol_errors;
502         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
503         drvs->tx_pauseframes = port_stats->tx_pauseframes;
504         drvs->tx_controlframes = port_stats->tx_controlframes;
505         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
506         drvs->jabber_events = port_stats->jabber_events;
507         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
508         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
509         drvs->forwarded_packets = rxf_stats->forwarded_packets;
510         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
511         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
512         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
513         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
514 }
515
516 static void populate_be_v2_stats(struct be_adapter *adapter)
517 {
518         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
519         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
520         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
521         struct be_port_rxf_stats_v2 *port_stats =
522                                         &rxf_stats->port[adapter->port_num];
523         struct be_drv_stats *drvs = &adapter->drv_stats;
524
525         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
526         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
527         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
528         drvs->rx_pause_frames = port_stats->rx_pause_frames;
529         drvs->rx_crc_errors = port_stats->rx_crc_errors;
530         drvs->rx_control_frames = port_stats->rx_control_frames;
531         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
532         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
533         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
534         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
535         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
536         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
537         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
538         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
539         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
540         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
541         drvs->rx_dropped_header_too_small =
542                 port_stats->rx_dropped_header_too_small;
543         drvs->rx_input_fifo_overflow_drop =
544                 port_stats->rx_input_fifo_overflow_drop;
545         drvs->rx_address_filtered = port_stats->rx_address_filtered;
546         drvs->rx_alignment_symbol_errors =
547                 port_stats->rx_alignment_symbol_errors;
548         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
549         drvs->tx_pauseframes = port_stats->tx_pauseframes;
550         drvs->tx_controlframes = port_stats->tx_controlframes;
551         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
552         drvs->jabber_events = port_stats->jabber_events;
553         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
554         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
555         drvs->forwarded_packets = rxf_stats->forwarded_packets;
556         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
557         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
558         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
559         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
560         if (be_roce_supported(adapter)) {
561                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
562                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
563                 drvs->rx_roce_frames = port_stats->roce_frames_received;
564                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
565                 drvs->roce_drops_payload_len =
566                         port_stats->roce_drops_payload_len;
567         }
568 }
569
570 static void populate_lancer_stats(struct be_adapter *adapter)
571 {
572         struct be_drv_stats *drvs = &adapter->drv_stats;
573         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
574
575         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
576         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
577         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
578         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
579         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
580         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
581         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
582         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
583         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
584         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
585         drvs->rx_dropped_tcp_length =
586                                 pport_stats->rx_dropped_invalid_tcp_length;
587         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
588         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
589         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
590         drvs->rx_dropped_header_too_small =
591                                 pport_stats->rx_dropped_header_too_small;
592         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593         drvs->rx_address_filtered =
594                                         pport_stats->rx_address_filtered +
595                                         pport_stats->rx_vlan_filtered;
596         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
597         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
598         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
599         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
600         drvs->jabber_events = pport_stats->rx_jabbers;
601         drvs->forwarded_packets = pport_stats->num_forwards_lo;
602         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
603         drvs->rx_drops_too_many_frags =
604                                 pport_stats->rx_drops_too_many_frags_lo;
605 }
606
607 static void accumulate_16bit_val(u32 *acc, u16 val)
608 {
609 #define lo(x)                   (x & 0xFFFF)
610 #define hi(x)                   (x & 0xFFFF0000)
611         bool wrapped = val < lo(*acc);
612         u32 newacc = hi(*acc) + val;
613
614         if (wrapped)
615                 newacc += 65536;
616         WRITE_ONCE(*acc, newacc);
617 }
618
619 static void populate_erx_stats(struct be_adapter *adapter,
620                                struct be_rx_obj *rxo, u32 erx_stat)
621 {
622         if (!BEx_chip(adapter))
623                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
624         else
625                 /* below erx HW counter can actually wrap around after
626                  * 65535. Driver accumulates a 32-bit value
627                  */
628                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
629                                      (u16)erx_stat);
630 }
631
632 void be_parse_stats(struct be_adapter *adapter)
633 {
634         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
635         struct be_rx_obj *rxo;
636         int i;
637         u32 erx_stat;
638
639         if (lancer_chip(adapter)) {
640                 populate_lancer_stats(adapter);
641         } else {
642                 if (BE2_chip(adapter))
643                         populate_be_v0_stats(adapter);
644                 else if (BE3_chip(adapter))
645                         /* for BE3 */
646                         populate_be_v1_stats(adapter);
647                 else
648                         populate_be_v2_stats(adapter);
649
650                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
651                 for_all_rx_queues(adapter, rxo, i) {
652                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
653                         populate_erx_stats(adapter, rxo, erx_stat);
654                 }
655         }
656 }
657
658 static void be_get_stats64(struct net_device *netdev,
659                            struct rtnl_link_stats64 *stats)
660 {
661         struct be_adapter *adapter = netdev_priv(netdev);
662         struct be_drv_stats *drvs = &adapter->drv_stats;
663         struct be_rx_obj *rxo;
664         struct be_tx_obj *txo;
665         u64 pkts, bytes;
666         unsigned int start;
667         int i;
668
669         for_all_rx_queues(adapter, rxo, i) {
670                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
671
672                 do {
673                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
674                         pkts = rx_stats(rxo)->rx_pkts;
675                         bytes = rx_stats(rxo)->rx_bytes;
676                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
677                 stats->rx_packets += pkts;
678                 stats->rx_bytes += bytes;
679                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
680                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
681                                         rx_stats(rxo)->rx_drops_no_frags;
682         }
683
684         for_all_tx_queues(adapter, txo, i) {
685                 const struct be_tx_stats *tx_stats = tx_stats(txo);
686
687                 do {
688                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
689                         pkts = tx_stats(txo)->tx_pkts;
690                         bytes = tx_stats(txo)->tx_bytes;
691                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
692                 stats->tx_packets += pkts;
693                 stats->tx_bytes += bytes;
694         }
695
696         /* bad pkts received */
697         stats->rx_errors = drvs->rx_crc_errors +
698                 drvs->rx_alignment_symbol_errors +
699                 drvs->rx_in_range_errors +
700                 drvs->rx_out_range_errors +
701                 drvs->rx_frame_too_long +
702                 drvs->rx_dropped_too_small +
703                 drvs->rx_dropped_too_short +
704                 drvs->rx_dropped_header_too_small +
705                 drvs->rx_dropped_tcp_length +
706                 drvs->rx_dropped_runt;
707
708         /* detailed rx errors */
709         stats->rx_length_errors = drvs->rx_in_range_errors +
710                 drvs->rx_out_range_errors +
711                 drvs->rx_frame_too_long;
712
713         stats->rx_crc_errors = drvs->rx_crc_errors;
714
715         /* frame alignment errors */
716         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
717
718         /* receiver fifo overrun */
719         /* drops_no_pbuf is no per i/f, it's per BE card */
720         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
721                                 drvs->rx_input_fifo_overflow_drop +
722                                 drvs->rx_drops_no_pbuf;
723 }
724
725 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
726 {
727         struct net_device *netdev = adapter->netdev;
728
729         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
730                 netif_carrier_off(netdev);
731                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
732         }
733
734         if (link_status)
735                 netif_carrier_on(netdev);
736         else
737                 netif_carrier_off(netdev);
738
739         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
740 }
741
742 static int be_gso_hdr_len(struct sk_buff *skb)
743 {
744         if (skb->encapsulation)
745                 return skb_inner_transport_offset(skb) +
746                        inner_tcp_hdrlen(skb);
747         return skb_transport_offset(skb) + tcp_hdrlen(skb);
748 }
749
750 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
751 {
752         struct be_tx_stats *stats = tx_stats(txo);
753         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
754         /* Account for headers which get duplicated in TSO pkt */
755         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
756
757         u64_stats_update_begin(&stats->sync);
758         stats->tx_reqs++;
759         stats->tx_bytes += skb->len + dup_hdr_len;
760         stats->tx_pkts += tx_pkts;
761         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
762                 stats->tx_vxlan_offload_pkts += tx_pkts;
763         u64_stats_update_end(&stats->sync);
764 }
765
766 /* Returns number of WRBs needed for the skb */
767 static u32 skb_wrb_cnt(struct sk_buff *skb)
768 {
769         /* +1 for the header wrb */
770         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
771 }
772
773 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
774 {
775         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
776         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
777         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
778         wrb->rsvd0 = 0;
779 }
780
781 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
782  * to avoid the swap and shift/mask operations in wrb_fill().
783  */
784 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
785 {
786         wrb->frag_pa_hi = 0;
787         wrb->frag_pa_lo = 0;
788         wrb->frag_len = 0;
789         wrb->rsvd0 = 0;
790 }
791
792 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
793                                      struct sk_buff *skb)
794 {
795         u8 vlan_prio;
796         u16 vlan_tag;
797
798         vlan_tag = skb_vlan_tag_get(skb);
799         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
800         /* If vlan priority provided by OS is NOT in available bmap */
801         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
802                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
803                                 adapter->recommended_prio_bits;
804
805         return vlan_tag;
806 }
807
808 /* Used only for IP tunnel packets */
809 static u16 skb_inner_ip_proto(struct sk_buff *skb)
810 {
811         return (inner_ip_hdr(skb)->version == 4) ?
812                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
813 }
814
815 static u16 skb_ip_proto(struct sk_buff *skb)
816 {
817         return (ip_hdr(skb)->version == 4) ?
818                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
819 }
820
821 static inline bool be_is_txq_full(struct be_tx_obj *txo)
822 {
823         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
824 }
825
826 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
827 {
828         return atomic_read(&txo->q.used) < txo->q.len / 2;
829 }
830
831 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
832 {
833         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
834 }
835
836 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
837                                        struct sk_buff *skb,
838                                        struct be_wrb_params *wrb_params)
839 {
840         u16 proto;
841
842         if (skb_is_gso(skb)) {
843                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
844                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
845                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
846                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
847         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
848                 if (skb->encapsulation) {
849                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
850                         proto = skb_inner_ip_proto(skb);
851                 } else {
852                         proto = skb_ip_proto(skb);
853                 }
854                 if (proto == IPPROTO_TCP)
855                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
856                 else if (proto == IPPROTO_UDP)
857                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
858         }
859
860         if (skb_vlan_tag_present(skb)) {
861                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
862                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
863         }
864
865         BE_WRB_F_SET(wrb_params->features, CRC, 1);
866 }
867
868 static void wrb_fill_hdr(struct be_adapter *adapter,
869                          struct be_eth_hdr_wrb *hdr,
870                          struct be_wrb_params *wrb_params,
871                          struct sk_buff *skb)
872 {
873         memset(hdr, 0, sizeof(*hdr));
874
875         SET_TX_WRB_HDR_BITS(crc, hdr,
876                             BE_WRB_F_GET(wrb_params->features, CRC));
877         SET_TX_WRB_HDR_BITS(ipcs, hdr,
878                             BE_WRB_F_GET(wrb_params->features, IPCS));
879         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
880                             BE_WRB_F_GET(wrb_params->features, TCPCS));
881         SET_TX_WRB_HDR_BITS(udpcs, hdr,
882                             BE_WRB_F_GET(wrb_params->features, UDPCS));
883
884         SET_TX_WRB_HDR_BITS(lso, hdr,
885                             BE_WRB_F_GET(wrb_params->features, LSO));
886         SET_TX_WRB_HDR_BITS(lso6, hdr,
887                             BE_WRB_F_GET(wrb_params->features, LSO6));
888         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
889
890         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
891          * hack is not needed, the evt bit is set while ringing DB.
892          */
893         SET_TX_WRB_HDR_BITS(event, hdr,
894                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
895         SET_TX_WRB_HDR_BITS(vlan, hdr,
896                             BE_WRB_F_GET(wrb_params->features, VLAN));
897         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
898
899         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
900         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
901         SET_TX_WRB_HDR_BITS(mgmt, hdr,
902                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
903 }
904
905 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
906                           bool unmap_single)
907 {
908         dma_addr_t dma;
909         u32 frag_len = le32_to_cpu(wrb->frag_len);
910
911
912         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
913                 (u64)le32_to_cpu(wrb->frag_pa_lo);
914         if (frag_len) {
915                 if (unmap_single)
916                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
917                 else
918                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
919         }
920 }
921
922 /* Grab a WRB header for xmit */
923 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
924 {
925         u32 head = txo->q.head;
926
927         queue_head_inc(&txo->q);
928         return head;
929 }
930
931 /* Set up the WRB header for xmit */
932 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
933                                 struct be_tx_obj *txo,
934                                 struct be_wrb_params *wrb_params,
935                                 struct sk_buff *skb, u16 head)
936 {
937         u32 num_frags = skb_wrb_cnt(skb);
938         struct be_queue_info *txq = &txo->q;
939         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
940
941         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
942         be_dws_cpu_to_le(hdr, sizeof(*hdr));
943
944         BUG_ON(txo->sent_skb_list[head]);
945         txo->sent_skb_list[head] = skb;
946         txo->last_req_hdr = head;
947         atomic_add(num_frags, &txq->used);
948         txo->last_req_wrb_cnt = num_frags;
949         txo->pend_wrb_cnt += num_frags;
950 }
951
952 /* Setup a WRB fragment (buffer descriptor) for xmit */
953 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
954                                  int len)
955 {
956         struct be_eth_wrb *wrb;
957         struct be_queue_info *txq = &txo->q;
958
959         wrb = queue_head_node(txq);
960         wrb_fill(wrb, busaddr, len);
961         queue_head_inc(txq);
962 }
963
964 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
965  * was invoked. The producer index is restored to the previous packet and the
966  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
967  */
968 static void be_xmit_restore(struct be_adapter *adapter,
969                             struct be_tx_obj *txo, u32 head, bool map_single,
970                             u32 copied)
971 {
972         struct device *dev;
973         struct be_eth_wrb *wrb;
974         struct be_queue_info *txq = &txo->q;
975
976         dev = &adapter->pdev->dev;
977         txq->head = head;
978
979         /* skip the first wrb (hdr); it's not mapped */
980         queue_head_inc(txq);
981         while (copied) {
982                 wrb = queue_head_node(txq);
983                 unmap_tx_frag(dev, wrb, map_single);
984                 map_single = false;
985                 copied -= le32_to_cpu(wrb->frag_len);
986                 queue_head_inc(txq);
987         }
988
989         txq->head = head;
990 }
991
992 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
993  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
994  * of WRBs used up by the packet.
995  */
996 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
997                            struct sk_buff *skb,
998                            struct be_wrb_params *wrb_params)
999 {
1000         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
1001         struct device *dev = &adapter->pdev->dev;
1002         bool map_single = false;
1003         u32 head;
1004         dma_addr_t busaddr;
1005         int len;
1006
1007         head = be_tx_get_wrb_hdr(txo);
1008
1009         if (skb->len > skb->data_len) {
1010                 len = skb_headlen(skb);
1011
1012                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1013                 if (dma_mapping_error(dev, busaddr))
1014                         goto dma_err;
1015                 map_single = true;
1016                 be_tx_setup_wrb_frag(txo, busaddr, len);
1017                 copied += len;
1018         }
1019
1020         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1021                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1022                 len = skb_frag_size(frag);
1023
1024                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1025                 if (dma_mapping_error(dev, busaddr))
1026                         goto dma_err;
1027                 be_tx_setup_wrb_frag(txo, busaddr, len);
1028                 copied += len;
1029         }
1030
1031         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1032
1033         be_tx_stats_update(txo, skb);
1034         return wrb_cnt;
1035
1036 dma_err:
1037         adapter->drv_stats.dma_map_errors++;
1038         be_xmit_restore(adapter, txo, head, map_single, copied);
1039         return 0;
1040 }
1041
1042 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1043 {
1044         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1045 }
1046
1047 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1048                                              struct sk_buff *skb,
1049                                              struct be_wrb_params
1050                                              *wrb_params)
1051 {
1052         u16 vlan_tag = 0;
1053
1054         skb = skb_share_check(skb, GFP_ATOMIC);
1055         if (unlikely(!skb))
1056                 return skb;
1057
1058         if (skb_vlan_tag_present(skb))
1059                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1060
1061         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1062                 if (!vlan_tag)
1063                         vlan_tag = adapter->pvid;
1064                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1065                  * skip VLAN insertion
1066                  */
1067                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1068         }
1069
1070         if (vlan_tag) {
1071                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1072                                                 vlan_tag);
1073                 if (unlikely(!skb))
1074                         return skb;
1075                 skb->vlan_tci = 0;
1076         }
1077
1078         /* Insert the outer VLAN, if any */
1079         if (adapter->qnq_vid) {
1080                 vlan_tag = adapter->qnq_vid;
1081                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1082                                                 vlan_tag);
1083                 if (unlikely(!skb))
1084                         return skb;
1085                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1086         }
1087
1088         return skb;
1089 }
1090
1091 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1092 {
1093         struct ethhdr *eh = (struct ethhdr *)skb->data;
1094         u16 offset = ETH_HLEN;
1095
1096         if (eh->h_proto == htons(ETH_P_IPV6)) {
1097                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1098
1099                 offset += sizeof(struct ipv6hdr);
1100                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1101                     ip6h->nexthdr != NEXTHDR_UDP) {
1102                         struct ipv6_opt_hdr *ehdr =
1103                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1104
1105                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1106                         if (ehdr->hdrlen == 0xff)
1107                                 return true;
1108                 }
1109         }
1110         return false;
1111 }
1112
1113 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1114 {
1115         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1116 }
1117
1118 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1121 }
1122
1123 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1124                                                   struct sk_buff *skb,
1125                                                   struct be_wrb_params
1126                                                   *wrb_params)
1127 {
1128         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1129         unsigned int eth_hdr_len;
1130         struct iphdr *ip;
1131
1132         /* For padded packets, BE HW modifies tot_len field in IP header
1133          * incorrecly when VLAN tag is inserted by HW.
1134          * For padded packets, Lancer computes incorrect checksum.
1135          */
1136         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1137                                                 VLAN_ETH_HLEN : ETH_HLEN;
1138         if (skb->len <= 60 &&
1139             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1140             is_ipv4_pkt(skb)) {
1141                 ip = (struct iphdr *)ip_hdr(skb);
1142                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1143         }
1144
1145         /* If vlan tag is already inlined in the packet, skip HW VLAN
1146          * tagging in pvid-tagging mode
1147          */
1148         if (be_pvid_tagging_enabled(adapter) &&
1149             veh->h_vlan_proto == htons(ETH_P_8021Q))
1150                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1151
1152         /* HW has a bug wherein it will calculate CSUM for VLAN
1153          * pkts even though it is disabled.
1154          * Manually insert VLAN in pkt.
1155          */
1156         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1157             skb_vlan_tag_present(skb)) {
1158                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1159                 if (unlikely(!skb))
1160                         goto err;
1161         }
1162
1163         /* HW may lockup when VLAN HW tagging is requested on
1164          * certain ipv6 packets. Drop such pkts if the HW workaround to
1165          * skip HW tagging is not enabled by FW.
1166          */
1167         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1168                      (adapter->pvid || adapter->qnq_vid) &&
1169                      !qnq_async_evt_rcvd(adapter)))
1170                 goto tx_drop;
1171
1172         /* Manual VLAN tag insertion to prevent:
1173          * ASIC lockup when the ASIC inserts VLAN tag into
1174          * certain ipv6 packets. Insert VLAN tags in driver,
1175          * and set event, completion, vlan bits accordingly
1176          * in the Tx WRB.
1177          */
1178         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1179             be_vlan_tag_tx_chk(adapter, skb)) {
1180                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1181                 if (unlikely(!skb))
1182                         goto err;
1183         }
1184
1185         return skb;
1186 tx_drop:
1187         dev_kfree_skb_any(skb);
1188 err:
1189         return NULL;
1190 }
1191
1192 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1193                                            struct sk_buff *skb,
1194                                            struct be_wrb_params *wrb_params)
1195 {
1196         int err;
1197
1198         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1199          * packets that are 32b or less may cause a transmit stall
1200          * on that port. The workaround is to pad such packets
1201          * (len <= 32 bytes) to a minimum length of 36b.
1202          */
1203         if (skb->len <= 32) {
1204                 if (skb_put_padto(skb, 36))
1205                         return NULL;
1206         }
1207
1208         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1209                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1210                 if (!skb)
1211                         return NULL;
1212         }
1213
1214         /* The stack can send us skbs with length greater than
1215          * what the HW can handle. Trim the extra bytes.
1216          */
1217         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1218         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1219         WARN_ON(err);
1220
1221         return skb;
1222 }
1223
1224 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1225 {
1226         struct be_queue_info *txq = &txo->q;
1227         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1228
1229         /* Mark the last request eventable if it hasn't been marked already */
1230         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1231                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1232
1233         /* compose a dummy wrb if there are odd set of wrbs to notify */
1234         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1235                 wrb_fill_dummy(queue_head_node(txq));
1236                 queue_head_inc(txq);
1237                 atomic_inc(&txq->used);
1238                 txo->pend_wrb_cnt++;
1239                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1240                                            TX_HDR_WRB_NUM_SHIFT);
1241                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1242                                           TX_HDR_WRB_NUM_SHIFT);
1243         }
1244         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1245         txo->pend_wrb_cnt = 0;
1246 }
1247
1248 /* OS2BMC related */
1249
1250 #define DHCP_CLIENT_PORT        68
1251 #define DHCP_SERVER_PORT        67
1252 #define NET_BIOS_PORT1          137
1253 #define NET_BIOS_PORT2          138
1254 #define DHCPV6_RAS_PORT         547
1255
1256 #define is_mc_allowed_on_bmc(adapter, eh)       \
1257         (!is_multicast_filt_enabled(adapter) && \
1258          is_multicast_ether_addr(eh->h_dest) && \
1259          !is_broadcast_ether_addr(eh->h_dest))
1260
1261 #define is_bc_allowed_on_bmc(adapter, eh)       \
1262         (!is_broadcast_filt_enabled(adapter) && \
1263          is_broadcast_ether_addr(eh->h_dest))
1264
1265 #define is_arp_allowed_on_bmc(adapter, skb)     \
1266         (is_arp(skb) && is_arp_filt_enabled(adapter))
1267
1268 #define is_broadcast_packet(eh, adapter)        \
1269                 (is_multicast_ether_addr(eh->h_dest) && \
1270                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1271
1272 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1273
1274 #define is_arp_filt_enabled(adapter)    \
1275                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1276
1277 #define is_dhcp_client_filt_enabled(adapter)    \
1278                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1279
1280 #define is_dhcp_srvr_filt_enabled(adapter)      \
1281                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1282
1283 #define is_nbios_filt_enabled(adapter)  \
1284                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1285
1286 #define is_ipv6_na_filt_enabled(adapter)        \
1287                 (adapter->bmc_filt_mask &       \
1288                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1289
1290 #define is_ipv6_ra_filt_enabled(adapter)        \
1291                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1292
1293 #define is_ipv6_ras_filt_enabled(adapter)       \
1294                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1295
1296 #define is_broadcast_filt_enabled(adapter)      \
1297                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1298
1299 #define is_multicast_filt_enabled(adapter)      \
1300                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1301
1302 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1303                                struct sk_buff **skb)
1304 {
1305         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1306         bool os2bmc = false;
1307
1308         if (!be_is_os2bmc_enabled(adapter))
1309                 goto done;
1310
1311         if (!is_multicast_ether_addr(eh->h_dest))
1312                 goto done;
1313
1314         if (is_mc_allowed_on_bmc(adapter, eh) ||
1315             is_bc_allowed_on_bmc(adapter, eh) ||
1316             is_arp_allowed_on_bmc(adapter, (*skb))) {
1317                 os2bmc = true;
1318                 goto done;
1319         }
1320
1321         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1322                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1323                 u8 nexthdr = hdr->nexthdr;
1324
1325                 if (nexthdr == IPPROTO_ICMPV6) {
1326                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1327
1328                         switch (icmp6->icmp6_type) {
1329                         case NDISC_ROUTER_ADVERTISEMENT:
1330                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1331                                 goto done;
1332                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1333                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1334                                 goto done;
1335                         default:
1336                                 break;
1337                         }
1338                 }
1339         }
1340
1341         if (is_udp_pkt((*skb))) {
1342                 struct udphdr *udp = udp_hdr((*skb));
1343
1344                 switch (ntohs(udp->dest)) {
1345                 case DHCP_CLIENT_PORT:
1346                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1347                         goto done;
1348                 case DHCP_SERVER_PORT:
1349                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1350                         goto done;
1351                 case NET_BIOS_PORT1:
1352                 case NET_BIOS_PORT2:
1353                         os2bmc = is_nbios_filt_enabled(adapter);
1354                         goto done;
1355                 case DHCPV6_RAS_PORT:
1356                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1357                         goto done;
1358                 default:
1359                         break;
1360                 }
1361         }
1362 done:
1363         /* For packets over a vlan, which are destined
1364          * to BMC, asic expects the vlan to be inline in the packet.
1365          */
1366         if (os2bmc)
1367                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1368
1369         return os2bmc;
1370 }
1371
1372 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1373 {
1374         struct be_adapter *adapter = netdev_priv(netdev);
1375         u16 q_idx = skb_get_queue_mapping(skb);
1376         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1377         struct be_wrb_params wrb_params = { 0 };
1378         bool flush = !skb->xmit_more;
1379         u16 wrb_cnt;
1380
1381         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1382         if (unlikely(!skb))
1383                 goto drop;
1384
1385         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1386
1387         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1388         if (unlikely(!wrb_cnt)) {
1389                 dev_kfree_skb_any(skb);
1390                 goto drop;
1391         }
1392
1393         /* if os2bmc is enabled and if the pkt is destined to bmc,
1394          * enqueue the pkt a 2nd time with mgmt bit set.
1395          */
1396         if (be_send_pkt_to_bmc(adapter, &skb)) {
1397                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1398                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1399                 if (unlikely(!wrb_cnt))
1400                         goto drop;
1401                 else
1402                         skb_get(skb);
1403         }
1404
1405         if (be_is_txq_full(txo)) {
1406                 netif_stop_subqueue(netdev, q_idx);
1407                 tx_stats(txo)->tx_stops++;
1408         }
1409
1410         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1411                 be_xmit_flush(adapter, txo);
1412
1413         return NETDEV_TX_OK;
1414 drop:
1415         tx_stats(txo)->tx_drv_drops++;
1416         /* Flush the already enqueued tx requests */
1417         if (flush && txo->pend_wrb_cnt)
1418                 be_xmit_flush(adapter, txo);
1419
1420         return NETDEV_TX_OK;
1421 }
1422
1423 static void be_tx_timeout(struct net_device *netdev)
1424 {
1425         struct be_adapter *adapter = netdev_priv(netdev);
1426         struct device *dev = &adapter->pdev->dev;
1427         struct be_tx_obj *txo;
1428         struct sk_buff *skb;
1429         struct tcphdr *tcphdr;
1430         struct udphdr *udphdr;
1431         u32 *entry;
1432         int status;
1433         int i, j;
1434
1435         for_all_tx_queues(adapter, txo, i) {
1436                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1437                          i, txo->q.head, txo->q.tail,
1438                          atomic_read(&txo->q.used), txo->q.id);
1439
1440                 entry = txo->q.dma_mem.va;
1441                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1442                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1443                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1444                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1445                                          j, entry[j], entry[j + 1],
1446                                          entry[j + 2], entry[j + 3]);
1447                         }
1448                 }
1449
1450                 entry = txo->cq.dma_mem.va;
1451                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1452                          i, txo->cq.head, txo->cq.tail,
1453                          atomic_read(&txo->cq.used));
1454                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1455                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1456                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1457                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1458                                          j, entry[j], entry[j + 1],
1459                                          entry[j + 2], entry[j + 3]);
1460                         }
1461                 }
1462
1463                 for (j = 0; j < TX_Q_LEN; j++) {
1464                         if (txo->sent_skb_list[j]) {
1465                                 skb = txo->sent_skb_list[j];
1466                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1467                                         tcphdr = tcp_hdr(skb);
1468                                         dev_info(dev, "TCP source port %d\n",
1469                                                  ntohs(tcphdr->source));
1470                                         dev_info(dev, "TCP dest port %d\n",
1471                                                  ntohs(tcphdr->dest));
1472                                         dev_info(dev, "TCP sequence num %d\n",
1473                                                  ntohs(tcphdr->seq));
1474                                         dev_info(dev, "TCP ack_seq %d\n",
1475                                                  ntohs(tcphdr->ack_seq));
1476                                 } else if (ip_hdr(skb)->protocol ==
1477                                            IPPROTO_UDP) {
1478                                         udphdr = udp_hdr(skb);
1479                                         dev_info(dev, "UDP source port %d\n",
1480                                                  ntohs(udphdr->source));
1481                                         dev_info(dev, "UDP dest port %d\n",
1482                                                  ntohs(udphdr->dest));
1483                                 }
1484                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1485                                          j, skb, skb->len, skb->protocol);
1486                         }
1487                 }
1488         }
1489
1490         if (lancer_chip(adapter)) {
1491                 dev_info(dev, "Initiating reset due to tx timeout\n");
1492                 dev_info(dev, "Resetting adapter\n");
1493                 status = lancer_physdev_ctrl(adapter,
1494                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1495                 if (status)
1496                         dev_err(dev, "Reset failed .. Reboot server\n");
1497         }
1498 }
1499
1500 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1501 {
1502         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1503                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1504 }
1505
1506 static int be_set_vlan_promisc(struct be_adapter *adapter)
1507 {
1508         struct device *dev = &adapter->pdev->dev;
1509         int status;
1510
1511         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1512                 return 0;
1513
1514         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1515         if (!status) {
1516                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1517                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1518         } else {
1519                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1520         }
1521         return status;
1522 }
1523
1524 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1525 {
1526         struct device *dev = &adapter->pdev->dev;
1527         int status;
1528
1529         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1530         if (!status) {
1531                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1532                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1533         }
1534         return status;
1535 }
1536
1537 /*
1538  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1539  * If the user configures more, place BE in vlan promiscuous mode.
1540  */
1541 static int be_vid_config(struct be_adapter *adapter)
1542 {
1543         struct device *dev = &adapter->pdev->dev;
1544         u16 vids[BE_NUM_VLANS_SUPPORTED];
1545         u16 num = 0, i = 0;
1546         int status = 0;
1547
1548         /* No need to change the VLAN state if the I/F is in promiscuous */
1549         if (adapter->netdev->flags & IFF_PROMISC)
1550                 return 0;
1551
1552         if (adapter->vlans_added > be_max_vlans(adapter))
1553                 return be_set_vlan_promisc(adapter);
1554
1555         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1556                 status = be_clear_vlan_promisc(adapter);
1557                 if (status)
1558                         return status;
1559         }
1560         /* Construct VLAN Table to give to HW */
1561         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1562                 vids[num++] = cpu_to_le16(i);
1563
1564         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1565         if (status) {
1566                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1567                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1568                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1569                     addl_status(status) ==
1570                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1571                         return be_set_vlan_promisc(adapter);
1572         }
1573         return status;
1574 }
1575
1576 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1577 {
1578         struct be_adapter *adapter = netdev_priv(netdev);
1579         int status = 0;
1580
1581         mutex_lock(&adapter->rx_filter_lock);
1582
1583         /* Packets with VID 0 are always received by Lancer by default */
1584         if (lancer_chip(adapter) && vid == 0)
1585                 goto done;
1586
1587         if (test_bit(vid, adapter->vids))
1588                 goto done;
1589
1590         set_bit(vid, adapter->vids);
1591         adapter->vlans_added++;
1592
1593         status = be_vid_config(adapter);
1594 done:
1595         mutex_unlock(&adapter->rx_filter_lock);
1596         return status;
1597 }
1598
1599 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1600 {
1601         struct be_adapter *adapter = netdev_priv(netdev);
1602         int status = 0;
1603
1604         mutex_lock(&adapter->rx_filter_lock);
1605
1606         /* Packets with VID 0 are always received by Lancer by default */
1607         if (lancer_chip(adapter) && vid == 0)
1608                 goto done;
1609
1610         if (!test_bit(vid, adapter->vids))
1611                 goto done;
1612
1613         clear_bit(vid, adapter->vids);
1614         adapter->vlans_added--;
1615
1616         status = be_vid_config(adapter);
1617 done:
1618         mutex_unlock(&adapter->rx_filter_lock);
1619         return status;
1620 }
1621
1622 static void be_set_all_promisc(struct be_adapter *adapter)
1623 {
1624         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1625         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1626 }
1627
1628 static void be_set_mc_promisc(struct be_adapter *adapter)
1629 {
1630         int status;
1631
1632         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1633                 return;
1634
1635         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1636         if (!status)
1637                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1638 }
1639
1640 static void be_set_uc_promisc(struct be_adapter *adapter)
1641 {
1642         int status;
1643
1644         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1645                 return;
1646
1647         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1648         if (!status)
1649                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1650 }
1651
1652 static void be_clear_uc_promisc(struct be_adapter *adapter)
1653 {
1654         int status;
1655
1656         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1657                 return;
1658
1659         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1660         if (!status)
1661                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1662 }
1663
1664 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1665  * We use a single callback function for both sync and unsync. We really don't
1666  * add/remove addresses through this callback. But, we use it to detect changes
1667  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1668  */
1669 static int be_uc_list_update(struct net_device *netdev,
1670                              const unsigned char *addr)
1671 {
1672         struct be_adapter *adapter = netdev_priv(netdev);
1673
1674         adapter->update_uc_list = true;
1675         return 0;
1676 }
1677
1678 static int be_mc_list_update(struct net_device *netdev,
1679                              const unsigned char *addr)
1680 {
1681         struct be_adapter *adapter = netdev_priv(netdev);
1682
1683         adapter->update_mc_list = true;
1684         return 0;
1685 }
1686
1687 static void be_set_mc_list(struct be_adapter *adapter)
1688 {
1689         struct net_device *netdev = adapter->netdev;
1690         struct netdev_hw_addr *ha;
1691         bool mc_promisc = false;
1692         int status;
1693
1694         netif_addr_lock_bh(netdev);
1695         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1696
1697         if (netdev->flags & IFF_PROMISC) {
1698                 adapter->update_mc_list = false;
1699         } else if (netdev->flags & IFF_ALLMULTI ||
1700                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1701                 /* Enable multicast promisc if num configured exceeds
1702                  * what we support
1703                  */
1704                 mc_promisc = true;
1705                 adapter->update_mc_list = false;
1706         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1707                 /* Update mc-list unconditionally if the iface was previously
1708                  * in mc-promisc mode and now is out of that mode.
1709                  */
1710                 adapter->update_mc_list = true;
1711         }
1712
1713         if (adapter->update_mc_list) {
1714                 int i = 0;
1715
1716                 /* cache the mc-list in adapter */
1717                 netdev_for_each_mc_addr(ha, netdev) {
1718                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1719                         i++;
1720                 }
1721                 adapter->mc_count = netdev_mc_count(netdev);
1722         }
1723         netif_addr_unlock_bh(netdev);
1724
1725         if (mc_promisc) {
1726                 be_set_mc_promisc(adapter);
1727         } else if (adapter->update_mc_list) {
1728                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1729                 if (!status)
1730                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1731                 else
1732                         be_set_mc_promisc(adapter);
1733
1734                 adapter->update_mc_list = false;
1735         }
1736 }
1737
1738 static void be_clear_mc_list(struct be_adapter *adapter)
1739 {
1740         struct net_device *netdev = adapter->netdev;
1741
1742         __dev_mc_unsync(netdev, NULL);
1743         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1744         adapter->mc_count = 0;
1745 }
1746
1747 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1748 {
1749         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1750                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1751                 return 0;
1752         }
1753
1754         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1755                                adapter->if_handle,
1756                                &adapter->pmac_id[uc_idx + 1], 0);
1757 }
1758
1759 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1760 {
1761         if (pmac_id == adapter->pmac_id[0])
1762                 return;
1763
1764         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1765 }
1766
1767 static void be_set_uc_list(struct be_adapter *adapter)
1768 {
1769         struct net_device *netdev = adapter->netdev;
1770         struct netdev_hw_addr *ha;
1771         bool uc_promisc = false;
1772         int curr_uc_macs = 0, i;
1773
1774         netif_addr_lock_bh(netdev);
1775         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1776
1777         if (netdev->flags & IFF_PROMISC) {
1778                 adapter->update_uc_list = false;
1779         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1780                 uc_promisc = true;
1781                 adapter->update_uc_list = false;
1782         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1783                 /* Update uc-list unconditionally if the iface was previously
1784                  * in uc-promisc mode and now is out of that mode.
1785                  */
1786                 adapter->update_uc_list = true;
1787         }
1788
1789         if (adapter->update_uc_list) {
1790                 /* cache the uc-list in adapter array */
1791                 i = 0;
1792                 netdev_for_each_uc_addr(ha, netdev) {
1793                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1794                         i++;
1795                 }
1796                 curr_uc_macs = netdev_uc_count(netdev);
1797         }
1798         netif_addr_unlock_bh(netdev);
1799
1800         if (uc_promisc) {
1801                 be_set_uc_promisc(adapter);
1802         } else if (adapter->update_uc_list) {
1803                 be_clear_uc_promisc(adapter);
1804
1805                 for (i = 0; i < adapter->uc_macs; i++)
1806                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1807
1808                 for (i = 0; i < curr_uc_macs; i++)
1809                         be_uc_mac_add(adapter, i);
1810                 adapter->uc_macs = curr_uc_macs;
1811                 adapter->update_uc_list = false;
1812         }
1813 }
1814
1815 static void be_clear_uc_list(struct be_adapter *adapter)
1816 {
1817         struct net_device *netdev = adapter->netdev;
1818         int i;
1819
1820         __dev_uc_unsync(netdev, NULL);
1821         for (i = 0; i < adapter->uc_macs; i++)
1822                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1823
1824         adapter->uc_macs = 0;
1825 }
1826
1827 static void __be_set_rx_mode(struct be_adapter *adapter)
1828 {
1829         struct net_device *netdev = adapter->netdev;
1830
1831         mutex_lock(&adapter->rx_filter_lock);
1832
1833         if (netdev->flags & IFF_PROMISC) {
1834                 if (!be_in_all_promisc(adapter))
1835                         be_set_all_promisc(adapter);
1836         } else if (be_in_all_promisc(adapter)) {
1837                 /* We need to re-program the vlan-list or clear
1838                  * vlan-promisc mode (if needed) when the interface
1839                  * comes out of promisc mode.
1840                  */
1841                 be_vid_config(adapter);
1842         }
1843
1844         be_set_uc_list(adapter);
1845         be_set_mc_list(adapter);
1846
1847         mutex_unlock(&adapter->rx_filter_lock);
1848 }
1849
1850 static void be_work_set_rx_mode(struct work_struct *work)
1851 {
1852         struct be_cmd_work *cmd_work =
1853                                 container_of(work, struct be_cmd_work, work);
1854
1855         __be_set_rx_mode(cmd_work->adapter);
1856         kfree(cmd_work);
1857 }
1858
1859 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1860 {
1861         struct be_adapter *adapter = netdev_priv(netdev);
1862         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1863         int status;
1864
1865         if (!sriov_enabled(adapter))
1866                 return -EPERM;
1867
1868         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1869                 return -EINVAL;
1870
1871         /* Proceed further only if user provided MAC is different
1872          * from active MAC
1873          */
1874         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1875                 return 0;
1876
1877         if (BEx_chip(adapter)) {
1878                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1879                                 vf + 1);
1880
1881                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1882                                          &vf_cfg->pmac_id, vf + 1);
1883         } else {
1884                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1885                                         vf + 1);
1886         }
1887
1888         if (status) {
1889                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1890                         mac, vf, status);
1891                 return be_cmd_status(status);
1892         }
1893
1894         ether_addr_copy(vf_cfg->mac_addr, mac);
1895
1896         return 0;
1897 }
1898
1899 static int be_get_vf_config(struct net_device *netdev, int vf,
1900                             struct ifla_vf_info *vi)
1901 {
1902         struct be_adapter *adapter = netdev_priv(netdev);
1903         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1904
1905         if (!sriov_enabled(adapter))
1906                 return -EPERM;
1907
1908         if (vf >= adapter->num_vfs)
1909                 return -EINVAL;
1910
1911         vi->vf = vf;
1912         vi->max_tx_rate = vf_cfg->tx_rate;
1913         vi->min_tx_rate = 0;
1914         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1915         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1916         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1917         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1918         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1919
1920         return 0;
1921 }
1922
1923 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1924 {
1925         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1926         u16 vids[BE_NUM_VLANS_SUPPORTED];
1927         int vf_if_id = vf_cfg->if_handle;
1928         int status;
1929
1930         /* Enable Transparent VLAN Tagging */
1931         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1932         if (status)
1933                 return status;
1934
1935         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1936         vids[0] = 0;
1937         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1938         if (!status)
1939                 dev_info(&adapter->pdev->dev,
1940                          "Cleared guest VLANs on VF%d", vf);
1941
1942         /* After TVT is enabled, disallow VFs to program VLAN filters */
1943         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1944                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1945                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1946                 if (!status)
1947                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1948         }
1949         return 0;
1950 }
1951
1952 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1953 {
1954         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1955         struct device *dev = &adapter->pdev->dev;
1956         int status;
1957
1958         /* Reset Transparent VLAN Tagging. */
1959         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1960                                        vf_cfg->if_handle, 0, 0);
1961         if (status)
1962                 return status;
1963
1964         /* Allow VFs to program VLAN filtering */
1965         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1966                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1967                                                   BE_PRIV_FILTMGMT, vf + 1);
1968                 if (!status) {
1969                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1970                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1971                 }
1972         }
1973
1974         dev_info(dev,
1975                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1976         return 0;
1977 }
1978
1979 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1980                           __be16 vlan_proto)
1981 {
1982         struct be_adapter *adapter = netdev_priv(netdev);
1983         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1984         int status;
1985
1986         if (!sriov_enabled(adapter))
1987                 return -EPERM;
1988
1989         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1990                 return -EINVAL;
1991
1992         if (vlan_proto != htons(ETH_P_8021Q))
1993                 return -EPROTONOSUPPORT;
1994
1995         if (vlan || qos) {
1996                 vlan |= qos << VLAN_PRIO_SHIFT;
1997                 status = be_set_vf_tvt(adapter, vf, vlan);
1998         } else {
1999                 status = be_clear_vf_tvt(adapter, vf);
2000         }
2001
2002         if (status) {
2003                 dev_err(&adapter->pdev->dev,
2004                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2005                         status);
2006                 return be_cmd_status(status);
2007         }
2008
2009         vf_cfg->vlan_tag = vlan;
2010         return 0;
2011 }
2012
2013 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2014                              int min_tx_rate, int max_tx_rate)
2015 {
2016         struct be_adapter *adapter = netdev_priv(netdev);
2017         struct device *dev = &adapter->pdev->dev;
2018         int percent_rate, status = 0;
2019         u16 link_speed = 0;
2020         u8 link_status;
2021
2022         if (!sriov_enabled(adapter))
2023                 return -EPERM;
2024
2025         if (vf >= adapter->num_vfs)
2026                 return -EINVAL;
2027
2028         if (min_tx_rate)
2029                 return -EINVAL;
2030
2031         if (!max_tx_rate)
2032                 goto config_qos;
2033
2034         status = be_cmd_link_status_query(adapter, &link_speed,
2035                                           &link_status, 0);
2036         if (status)
2037                 goto err;
2038
2039         if (!link_status) {
2040                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2041                 status = -ENETDOWN;
2042                 goto err;
2043         }
2044
2045         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2046                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2047                         link_speed);
2048                 status = -EINVAL;
2049                 goto err;
2050         }
2051
2052         /* On Skyhawk the QOS setting must be done only as a % value */
2053         percent_rate = link_speed / 100;
2054         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2055                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2056                         percent_rate);
2057                 status = -EINVAL;
2058                 goto err;
2059         }
2060
2061 config_qos:
2062         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2063         if (status)
2064                 goto err;
2065
2066         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2067         return 0;
2068
2069 err:
2070         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2071                 max_tx_rate, vf);
2072         return be_cmd_status(status);
2073 }
2074
2075 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2076                                 int link_state)
2077 {
2078         struct be_adapter *adapter = netdev_priv(netdev);
2079         int status;
2080
2081         if (!sriov_enabled(adapter))
2082                 return -EPERM;
2083
2084         if (vf >= adapter->num_vfs)
2085                 return -EINVAL;
2086
2087         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2088         if (status) {
2089                 dev_err(&adapter->pdev->dev,
2090                         "Link state change on VF %d failed: %#x\n", vf, status);
2091                 return be_cmd_status(status);
2092         }
2093
2094         adapter->vf_cfg[vf].plink_tracking = link_state;
2095
2096         return 0;
2097 }
2098
2099 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2100 {
2101         struct be_adapter *adapter = netdev_priv(netdev);
2102         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2103         u8 spoofchk;
2104         int status;
2105
2106         if (!sriov_enabled(adapter))
2107                 return -EPERM;
2108
2109         if (vf >= adapter->num_vfs)
2110                 return -EINVAL;
2111
2112         if (BEx_chip(adapter))
2113                 return -EOPNOTSUPP;
2114
2115         if (enable == vf_cfg->spoofchk)
2116                 return 0;
2117
2118         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2119
2120         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2121                                        0, spoofchk);
2122         if (status) {
2123                 dev_err(&adapter->pdev->dev,
2124                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2125                 return be_cmd_status(status);
2126         }
2127
2128         vf_cfg->spoofchk = enable;
2129         return 0;
2130 }
2131
2132 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2133                           ulong now)
2134 {
2135         aic->rx_pkts_prev = rx_pkts;
2136         aic->tx_reqs_prev = tx_pkts;
2137         aic->jiffies = now;
2138 }
2139
2140 static int be_get_new_eqd(struct be_eq_obj *eqo)
2141 {
2142         struct be_adapter *adapter = eqo->adapter;
2143         int eqd, start;
2144         struct be_aic_obj *aic;
2145         struct be_rx_obj *rxo;
2146         struct be_tx_obj *txo;
2147         u64 rx_pkts = 0, tx_pkts = 0;
2148         ulong now;
2149         u32 pps, delta;
2150         int i;
2151
2152         aic = &adapter->aic_obj[eqo->idx];
2153         if (!aic->enable) {
2154                 if (aic->jiffies)
2155                         aic->jiffies = 0;
2156                 eqd = aic->et_eqd;
2157                 return eqd;
2158         }
2159
2160         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2161                 do {
2162                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2163                         rx_pkts += rxo->stats.rx_pkts;
2164                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2165         }
2166
2167         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2168                 do {
2169                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2170                         tx_pkts += txo->stats.tx_reqs;
2171                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2172         }
2173
2174         /* Skip, if wrapped around or first calculation */
2175         now = jiffies;
2176         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2177             rx_pkts < aic->rx_pkts_prev ||
2178             tx_pkts < aic->tx_reqs_prev) {
2179                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2180                 return aic->prev_eqd;
2181         }
2182
2183         delta = jiffies_to_msecs(now - aic->jiffies);
2184         if (delta == 0)
2185                 return aic->prev_eqd;
2186
2187         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2188                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2189         eqd = (pps / 15000) << 2;
2190
2191         if (eqd < 8)
2192                 eqd = 0;
2193         eqd = min_t(u32, eqd, aic->max_eqd);
2194         eqd = max_t(u32, eqd, aic->min_eqd);
2195
2196         be_aic_update(aic, rx_pkts, tx_pkts, now);
2197
2198         return eqd;
2199 }
2200
2201 /* For Skyhawk-R only */
2202 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2203 {
2204         struct be_adapter *adapter = eqo->adapter;
2205         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2206         ulong now = jiffies;
2207         int eqd;
2208         u32 mult_enc;
2209
2210         if (!aic->enable)
2211                 return 0;
2212
2213         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2214                 eqd = aic->prev_eqd;
2215         else
2216                 eqd = be_get_new_eqd(eqo);
2217
2218         if (eqd > 100)
2219                 mult_enc = R2I_DLY_ENC_1;
2220         else if (eqd > 60)
2221                 mult_enc = R2I_DLY_ENC_2;
2222         else if (eqd > 20)
2223                 mult_enc = R2I_DLY_ENC_3;
2224         else
2225                 mult_enc = R2I_DLY_ENC_0;
2226
2227         aic->prev_eqd = eqd;
2228
2229         return mult_enc;
2230 }
2231
2232 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2233 {
2234         struct be_set_eqd set_eqd[MAX_EVT_QS];
2235         struct be_aic_obj *aic;
2236         struct be_eq_obj *eqo;
2237         int i, num = 0, eqd;
2238
2239         for_all_evt_queues(adapter, eqo, i) {
2240                 aic = &adapter->aic_obj[eqo->idx];
2241                 eqd = be_get_new_eqd(eqo);
2242                 if (force_update || eqd != aic->prev_eqd) {
2243                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2244                         set_eqd[num].eq_id = eqo->q.id;
2245                         aic->prev_eqd = eqd;
2246                         num++;
2247                 }
2248         }
2249
2250         if (num)
2251                 be_cmd_modify_eqd(adapter, set_eqd, num);
2252 }
2253
2254 static void be_rx_stats_update(struct be_rx_obj *rxo,
2255                                struct be_rx_compl_info *rxcp)
2256 {
2257         struct be_rx_stats *stats = rx_stats(rxo);
2258
2259         u64_stats_update_begin(&stats->sync);
2260         stats->rx_compl++;
2261         stats->rx_bytes += rxcp->pkt_size;
2262         stats->rx_pkts++;
2263         if (rxcp->tunneled)
2264                 stats->rx_vxlan_offload_pkts++;
2265         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2266                 stats->rx_mcast_pkts++;
2267         if (rxcp->err)
2268                 stats->rx_compl_err++;
2269         u64_stats_update_end(&stats->sync);
2270 }
2271
2272 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2273 {
2274         /* L4 checksum is not reliable for non TCP/UDP packets.
2275          * Also ignore ipcksm for ipv6 pkts
2276          */
2277         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2278                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2279 }
2280
2281 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2282 {
2283         struct be_adapter *adapter = rxo->adapter;
2284         struct be_rx_page_info *rx_page_info;
2285         struct be_queue_info *rxq = &rxo->q;
2286         u32 frag_idx = rxq->tail;
2287
2288         rx_page_info = &rxo->page_info_tbl[frag_idx];
2289         BUG_ON(!rx_page_info->page);
2290
2291         if (rx_page_info->last_frag) {
2292                 dma_unmap_page(&adapter->pdev->dev,
2293                                dma_unmap_addr(rx_page_info, bus),
2294                                adapter->big_page_size, DMA_FROM_DEVICE);
2295                 rx_page_info->last_frag = false;
2296         } else {
2297                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2298                                         dma_unmap_addr(rx_page_info, bus),
2299                                         rx_frag_size, DMA_FROM_DEVICE);
2300         }
2301
2302         queue_tail_inc(rxq);
2303         atomic_dec(&rxq->used);
2304         return rx_page_info;
2305 }
2306
2307 /* Throwaway the data in the Rx completion */
2308 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2309                                 struct be_rx_compl_info *rxcp)
2310 {
2311         struct be_rx_page_info *page_info;
2312         u16 i, num_rcvd = rxcp->num_rcvd;
2313
2314         for (i = 0; i < num_rcvd; i++) {
2315                 page_info = get_rx_page_info(rxo);
2316                 put_page(page_info->page);
2317                 memset(page_info, 0, sizeof(*page_info));
2318         }
2319 }
2320
2321 /*
2322  * skb_fill_rx_data forms a complete skb for an ether frame
2323  * indicated by rxcp.
2324  */
2325 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2326                              struct be_rx_compl_info *rxcp)
2327 {
2328         struct be_rx_page_info *page_info;
2329         u16 i, j;
2330         u16 hdr_len, curr_frag_len, remaining;
2331         u8 *start;
2332
2333         page_info = get_rx_page_info(rxo);
2334         start = page_address(page_info->page) + page_info->page_offset;
2335         prefetch(start);
2336
2337         /* Copy data in the first descriptor of this completion */
2338         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2339
2340         skb->len = curr_frag_len;
2341         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2342                 memcpy(skb->data, start, curr_frag_len);
2343                 /* Complete packet has now been moved to data */
2344                 put_page(page_info->page);
2345                 skb->data_len = 0;
2346                 skb->tail += curr_frag_len;
2347         } else {
2348                 hdr_len = ETH_HLEN;
2349                 memcpy(skb->data, start, hdr_len);
2350                 skb_shinfo(skb)->nr_frags = 1;
2351                 skb_frag_set_page(skb, 0, page_info->page);
2352                 skb_shinfo(skb)->frags[0].page_offset =
2353                                         page_info->page_offset + hdr_len;
2354                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2355                                   curr_frag_len - hdr_len);
2356                 skb->data_len = curr_frag_len - hdr_len;
2357                 skb->truesize += rx_frag_size;
2358                 skb->tail += hdr_len;
2359         }
2360         page_info->page = NULL;
2361
2362         if (rxcp->pkt_size <= rx_frag_size) {
2363                 BUG_ON(rxcp->num_rcvd != 1);
2364                 return;
2365         }
2366
2367         /* More frags present for this completion */
2368         remaining = rxcp->pkt_size - curr_frag_len;
2369         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2370                 page_info = get_rx_page_info(rxo);
2371                 curr_frag_len = min(remaining, rx_frag_size);
2372
2373                 /* Coalesce all frags from the same physical page in one slot */
2374                 if (page_info->page_offset == 0) {
2375                         /* Fresh page */
2376                         j++;
2377                         skb_frag_set_page(skb, j, page_info->page);
2378                         skb_shinfo(skb)->frags[j].page_offset =
2379                                                         page_info->page_offset;
2380                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2381                         skb_shinfo(skb)->nr_frags++;
2382                 } else {
2383                         put_page(page_info->page);
2384                 }
2385
2386                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2387                 skb->len += curr_frag_len;
2388                 skb->data_len += curr_frag_len;
2389                 skb->truesize += rx_frag_size;
2390                 remaining -= curr_frag_len;
2391                 page_info->page = NULL;
2392         }
2393         BUG_ON(j > MAX_SKB_FRAGS);
2394 }
2395
2396 /* Process the RX completion indicated by rxcp when GRO is disabled */
2397 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2398                                 struct be_rx_compl_info *rxcp)
2399 {
2400         struct be_adapter *adapter = rxo->adapter;
2401         struct net_device *netdev = adapter->netdev;
2402         struct sk_buff *skb;
2403
2404         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2405         if (unlikely(!skb)) {
2406                 rx_stats(rxo)->rx_drops_no_skbs++;
2407                 be_rx_compl_discard(rxo, rxcp);
2408                 return;
2409         }
2410
2411         skb_fill_rx_data(rxo, skb, rxcp);
2412
2413         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2414                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2415         else
2416                 skb_checksum_none_assert(skb);
2417
2418         skb->protocol = eth_type_trans(skb, netdev);
2419         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2420         if (netdev->features & NETIF_F_RXHASH)
2421                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2422
2423         skb->csum_level = rxcp->tunneled;
2424         skb_mark_napi_id(skb, napi);
2425
2426         if (rxcp->vlanf)
2427                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2428
2429         netif_receive_skb(skb);
2430 }
2431
2432 /* Process the RX completion indicated by rxcp when GRO is enabled */
2433 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2434                                     struct napi_struct *napi,
2435                                     struct be_rx_compl_info *rxcp)
2436 {
2437         struct be_adapter *adapter = rxo->adapter;
2438         struct be_rx_page_info *page_info;
2439         struct sk_buff *skb = NULL;
2440         u16 remaining, curr_frag_len;
2441         u16 i, j;
2442
2443         skb = napi_get_frags(napi);
2444         if (!skb) {
2445                 be_rx_compl_discard(rxo, rxcp);
2446                 return;
2447         }
2448
2449         remaining = rxcp->pkt_size;
2450         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2451                 page_info = get_rx_page_info(rxo);
2452
2453                 curr_frag_len = min(remaining, rx_frag_size);
2454
2455                 /* Coalesce all frags from the same physical page in one slot */
2456                 if (i == 0 || page_info->page_offset == 0) {
2457                         /* First frag or Fresh page */
2458                         j++;
2459                         skb_frag_set_page(skb, j, page_info->page);
2460                         skb_shinfo(skb)->frags[j].page_offset =
2461                                                         page_info->page_offset;
2462                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2463                 } else {
2464                         put_page(page_info->page);
2465                 }
2466                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2467                 skb->truesize += rx_frag_size;
2468                 remaining -= curr_frag_len;
2469                 memset(page_info, 0, sizeof(*page_info));
2470         }
2471         BUG_ON(j > MAX_SKB_FRAGS);
2472
2473         skb_shinfo(skb)->nr_frags = j + 1;
2474         skb->len = rxcp->pkt_size;
2475         skb->data_len = rxcp->pkt_size;
2476         skb->ip_summed = CHECKSUM_UNNECESSARY;
2477         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2478         if (adapter->netdev->features & NETIF_F_RXHASH)
2479                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2480
2481         skb->csum_level = rxcp->tunneled;
2482
2483         if (rxcp->vlanf)
2484                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2485
2486         napi_gro_frags(napi);
2487 }
2488
2489 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2490                                  struct be_rx_compl_info *rxcp)
2491 {
2492         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2493         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2494         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2495         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2496         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2497         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2498         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2499         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2500         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2501         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2502         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2503         if (rxcp->vlanf) {
2504                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2505                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2506         }
2507         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2508         rxcp->tunneled =
2509                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2510 }
2511
2512 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2513                                  struct be_rx_compl_info *rxcp)
2514 {
2515         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2516         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2517         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2518         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2519         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2520         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2521         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2522         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2523         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2524         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2525         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2526         if (rxcp->vlanf) {
2527                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2528                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2529         }
2530         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2531         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2532 }
2533
2534 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2535 {
2536         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2537         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2538         struct be_adapter *adapter = rxo->adapter;
2539
2540         /* For checking the valid bit it is Ok to use either definition as the
2541          * valid bit is at the same position in both v0 and v1 Rx compl */
2542         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2543                 return NULL;
2544
2545         rmb();
2546         be_dws_le_to_cpu(compl, sizeof(*compl));
2547
2548         if (adapter->be3_native)
2549                 be_parse_rx_compl_v1(compl, rxcp);
2550         else
2551                 be_parse_rx_compl_v0(compl, rxcp);
2552
2553         if (rxcp->ip_frag)
2554                 rxcp->l4_csum = 0;
2555
2556         if (rxcp->vlanf) {
2557                 /* In QNQ modes, if qnq bit is not set, then the packet was
2558                  * tagged only with the transparent outer vlan-tag and must
2559                  * not be treated as a vlan packet by host
2560                  */
2561                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2562                         rxcp->vlanf = 0;
2563
2564                 if (!lancer_chip(adapter))
2565                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2566
2567                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2568                     !test_bit(rxcp->vlan_tag, adapter->vids))
2569                         rxcp->vlanf = 0;
2570         }
2571
2572         /* As the compl has been parsed, reset it; we wont touch it again */
2573         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2574
2575         queue_tail_inc(&rxo->cq);
2576         return rxcp;
2577 }
2578
2579 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2580 {
2581         u32 order = get_order(size);
2582
2583         if (order > 0)
2584                 gfp |= __GFP_COMP;
2585         return  alloc_pages(gfp, order);
2586 }
2587
2588 /*
2589  * Allocate a page, split it to fragments of size rx_frag_size and post as
2590  * receive buffers to BE
2591  */
2592 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2593 {
2594         struct be_adapter *adapter = rxo->adapter;
2595         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2596         struct be_queue_info *rxq = &rxo->q;
2597         struct page *pagep = NULL;
2598         struct device *dev = &adapter->pdev->dev;
2599         struct be_eth_rx_d *rxd;
2600         u64 page_dmaaddr = 0, frag_dmaaddr;
2601         u32 posted, page_offset = 0, notify = 0;
2602
2603         page_info = &rxo->page_info_tbl[rxq->head];
2604         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2605                 if (!pagep) {
2606                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2607                         if (unlikely(!pagep)) {
2608                                 rx_stats(rxo)->rx_post_fail++;
2609                                 break;
2610                         }
2611                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2612                                                     adapter->big_page_size,
2613                                                     DMA_FROM_DEVICE);
2614                         if (dma_mapping_error(dev, page_dmaaddr)) {
2615                                 put_page(pagep);
2616                                 pagep = NULL;
2617                                 adapter->drv_stats.dma_map_errors++;
2618                                 break;
2619                         }
2620                         page_offset = 0;
2621                 } else {
2622                         get_page(pagep);
2623                         page_offset += rx_frag_size;
2624                 }
2625                 page_info->page_offset = page_offset;
2626                 page_info->page = pagep;
2627
2628                 rxd = queue_head_node(rxq);
2629                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2630                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2631                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2632
2633                 /* Any space left in the current big page for another frag? */
2634                 if ((page_offset + rx_frag_size + rx_frag_size) >
2635                                         adapter->big_page_size) {
2636                         pagep = NULL;
2637                         page_info->last_frag = true;
2638                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2639                 } else {
2640                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2641                 }
2642
2643                 prev_page_info = page_info;
2644                 queue_head_inc(rxq);
2645                 page_info = &rxo->page_info_tbl[rxq->head];
2646         }
2647
2648         /* Mark the last frag of a page when we break out of the above loop
2649          * with no more slots available in the RXQ
2650          */
2651         if (pagep) {
2652                 prev_page_info->last_frag = true;
2653                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2654         }
2655
2656         if (posted) {
2657                 atomic_add(posted, &rxq->used);
2658                 if (rxo->rx_post_starved)
2659                         rxo->rx_post_starved = false;
2660                 do {
2661                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2662                         be_rxq_notify(adapter, rxq->id, notify);
2663                         posted -= notify;
2664                 } while (posted);
2665         } else if (atomic_read(&rxq->used) == 0) {
2666                 /* Let be_worker replenish when memory is available */
2667                 rxo->rx_post_starved = true;
2668         }
2669 }
2670
2671 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2672 {
2673         switch (status) {
2674         case BE_TX_COMP_HDR_PARSE_ERR:
2675                 tx_stats(txo)->tx_hdr_parse_err++;
2676                 break;
2677         case BE_TX_COMP_NDMA_ERR:
2678                 tx_stats(txo)->tx_dma_err++;
2679                 break;
2680         case BE_TX_COMP_ACL_ERR:
2681                 tx_stats(txo)->tx_spoof_check_err++;
2682                 break;
2683         }
2684 }
2685
2686 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2687 {
2688         switch (status) {
2689         case LANCER_TX_COMP_LSO_ERR:
2690                 tx_stats(txo)->tx_tso_err++;
2691                 break;
2692         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2693         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2694                 tx_stats(txo)->tx_spoof_check_err++;
2695                 break;
2696         case LANCER_TX_COMP_QINQ_ERR:
2697                 tx_stats(txo)->tx_qinq_err++;
2698                 break;
2699         case LANCER_TX_COMP_PARITY_ERR:
2700                 tx_stats(txo)->tx_internal_parity_err++;
2701                 break;
2702         case LANCER_TX_COMP_DMA_ERR:
2703                 tx_stats(txo)->tx_dma_err++;
2704                 break;
2705         case LANCER_TX_COMP_SGE_ERR:
2706                 tx_stats(txo)->tx_sge_err++;
2707                 break;
2708         }
2709 }
2710
2711 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2712                                                 struct be_tx_obj *txo)
2713 {
2714         struct be_queue_info *tx_cq = &txo->cq;
2715         struct be_tx_compl_info *txcp = &txo->txcp;
2716         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2717
2718         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2719                 return NULL;
2720
2721         /* Ensure load ordering of valid bit dword and other dwords below */
2722         rmb();
2723         be_dws_le_to_cpu(compl, sizeof(*compl));
2724
2725         txcp->status = GET_TX_COMPL_BITS(status, compl);
2726         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2727
2728         if (txcp->status) {
2729                 if (lancer_chip(adapter)) {
2730                         lancer_update_tx_err(txo, txcp->status);
2731                         /* Reset the adapter incase of TSO,
2732                          * SGE or Parity error
2733                          */
2734                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2735                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2736                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2737                                 be_set_error(adapter, BE_ERROR_TX);
2738                 } else {
2739                         be_update_tx_err(txo, txcp->status);
2740                 }
2741         }
2742
2743         if (be_check_error(adapter, BE_ERROR_TX))
2744                 return NULL;
2745
2746         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2747         queue_tail_inc(tx_cq);
2748         return txcp;
2749 }
2750
2751 static u16 be_tx_compl_process(struct be_adapter *adapter,
2752                                struct be_tx_obj *txo, u16 last_index)
2753 {
2754         struct sk_buff **sent_skbs = txo->sent_skb_list;
2755         struct be_queue_info *txq = &txo->q;
2756         struct sk_buff *skb = NULL;
2757         bool unmap_skb_hdr = false;
2758         struct be_eth_wrb *wrb;
2759         u16 num_wrbs = 0;
2760         u32 frag_index;
2761
2762         do {
2763                 if (sent_skbs[txq->tail]) {
2764                         /* Free skb from prev req */
2765                         if (skb)
2766                                 dev_consume_skb_any(skb);
2767                         skb = sent_skbs[txq->tail];
2768                         sent_skbs[txq->tail] = NULL;
2769                         queue_tail_inc(txq);  /* skip hdr wrb */
2770                         num_wrbs++;
2771                         unmap_skb_hdr = true;
2772                 }
2773                 wrb = queue_tail_node(txq);
2774                 frag_index = txq->tail;
2775                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2776                               (unmap_skb_hdr && skb_headlen(skb)));
2777                 unmap_skb_hdr = false;
2778                 queue_tail_inc(txq);
2779                 num_wrbs++;
2780         } while (frag_index != last_index);
2781         dev_consume_skb_any(skb);
2782
2783         return num_wrbs;
2784 }
2785
2786 /* Return the number of events in the event queue */
2787 static inline int events_get(struct be_eq_obj *eqo)
2788 {
2789         struct be_eq_entry *eqe;
2790         int num = 0;
2791
2792         do {
2793                 eqe = queue_tail_node(&eqo->q);
2794                 if (eqe->evt == 0)
2795                         break;
2796
2797                 rmb();
2798                 eqe->evt = 0;
2799                 num++;
2800                 queue_tail_inc(&eqo->q);
2801         } while (true);
2802
2803         return num;
2804 }
2805
2806 /* Leaves the EQ is disarmed state */
2807 static void be_eq_clean(struct be_eq_obj *eqo)
2808 {
2809         int num = events_get(eqo);
2810
2811         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2812 }
2813
2814 /* Free posted rx buffers that were not used */
2815 static void be_rxq_clean(struct be_rx_obj *rxo)
2816 {
2817         struct be_queue_info *rxq = &rxo->q;
2818         struct be_rx_page_info *page_info;
2819
2820         while (atomic_read(&rxq->used) > 0) {
2821                 page_info = get_rx_page_info(rxo);
2822                 put_page(page_info->page);
2823                 memset(page_info, 0, sizeof(*page_info));
2824         }
2825         BUG_ON(atomic_read(&rxq->used));
2826         rxq->tail = 0;
2827         rxq->head = 0;
2828 }
2829
2830 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2831 {
2832         struct be_queue_info *rx_cq = &rxo->cq;
2833         struct be_rx_compl_info *rxcp;
2834         struct be_adapter *adapter = rxo->adapter;
2835         int flush_wait = 0;
2836
2837         /* Consume pending rx completions.
2838          * Wait for the flush completion (identified by zero num_rcvd)
2839          * to arrive. Notify CQ even when there are no more CQ entries
2840          * for HW to flush partially coalesced CQ entries.
2841          * In Lancer, there is no need to wait for flush compl.
2842          */
2843         for (;;) {
2844                 rxcp = be_rx_compl_get(rxo);
2845                 if (!rxcp) {
2846                         if (lancer_chip(adapter))
2847                                 break;
2848
2849                         if (flush_wait++ > 50 ||
2850                             be_check_error(adapter,
2851                                            BE_ERROR_HW)) {
2852                                 dev_warn(&adapter->pdev->dev,
2853                                          "did not receive flush compl\n");
2854                                 break;
2855                         }
2856                         be_cq_notify(adapter, rx_cq->id, true, 0);
2857                         mdelay(1);
2858                 } else {
2859                         be_rx_compl_discard(rxo, rxcp);
2860                         be_cq_notify(adapter, rx_cq->id, false, 1);
2861                         if (rxcp->num_rcvd == 0)
2862                                 break;
2863                 }
2864         }
2865
2866         /* After cleanup, leave the CQ in unarmed state */
2867         be_cq_notify(adapter, rx_cq->id, false, 0);
2868 }
2869
2870 static void be_tx_compl_clean(struct be_adapter *adapter)
2871 {
2872         struct device *dev = &adapter->pdev->dev;
2873         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2874         struct be_tx_compl_info *txcp;
2875         struct be_queue_info *txq;
2876         u32 end_idx, notified_idx;
2877         struct be_tx_obj *txo;
2878         int i, pending_txqs;
2879
2880         /* Stop polling for compls when HW has been silent for 10ms */
2881         do {
2882                 pending_txqs = adapter->num_tx_qs;
2883
2884                 for_all_tx_queues(adapter, txo, i) {
2885                         cmpl = 0;
2886                         num_wrbs = 0;
2887                         txq = &txo->q;
2888                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2889                                 num_wrbs +=
2890                                         be_tx_compl_process(adapter, txo,
2891                                                             txcp->end_index);
2892                                 cmpl++;
2893                         }
2894                         if (cmpl) {
2895                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2896                                 atomic_sub(num_wrbs, &txq->used);
2897                                 timeo = 0;
2898                         }
2899                         if (!be_is_tx_compl_pending(txo))
2900                                 pending_txqs--;
2901                 }
2902
2903                 if (pending_txqs == 0 || ++timeo > 10 ||
2904                     be_check_error(adapter, BE_ERROR_HW))
2905                         break;
2906
2907                 mdelay(1);
2908         } while (true);
2909
2910         /* Free enqueued TX that was never notified to HW */
2911         for_all_tx_queues(adapter, txo, i) {
2912                 txq = &txo->q;
2913
2914                 if (atomic_read(&txq->used)) {
2915                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2916                                  i, atomic_read(&txq->used));
2917                         notified_idx = txq->tail;
2918                         end_idx = txq->tail;
2919                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2920                                   txq->len);
2921                         /* Use the tx-compl process logic to handle requests
2922                          * that were not sent to the HW.
2923                          */
2924                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2925                         atomic_sub(num_wrbs, &txq->used);
2926                         BUG_ON(atomic_read(&txq->used));
2927                         txo->pend_wrb_cnt = 0;
2928                         /* Since hw was never notified of these requests,
2929                          * reset TXQ indices
2930                          */
2931                         txq->head = notified_idx;
2932                         txq->tail = notified_idx;
2933                 }
2934         }
2935 }
2936
2937 static void be_evt_queues_destroy(struct be_adapter *adapter)
2938 {
2939         struct be_eq_obj *eqo;
2940         int i;
2941
2942         for_all_evt_queues(adapter, eqo, i) {
2943                 if (eqo->q.created) {
2944                         be_eq_clean(eqo);
2945                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2946                         netif_napi_del(&eqo->napi);
2947                         free_cpumask_var(eqo->affinity_mask);
2948                 }
2949                 be_queue_free(adapter, &eqo->q);
2950         }
2951 }
2952
2953 static int be_evt_queues_create(struct be_adapter *adapter)
2954 {
2955         struct be_queue_info *eq;
2956         struct be_eq_obj *eqo;
2957         struct be_aic_obj *aic;
2958         int i, rc;
2959
2960         /* need enough EQs to service both RX and TX queues */
2961         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2962                                     max(adapter->cfg_num_rx_irqs,
2963                                         adapter->cfg_num_tx_irqs));
2964
2965         for_all_evt_queues(adapter, eqo, i) {
2966                 int numa_node = dev_to_node(&adapter->pdev->dev);
2967
2968                 aic = &adapter->aic_obj[i];
2969                 eqo->adapter = adapter;
2970                 eqo->idx = i;
2971                 aic->max_eqd = BE_MAX_EQD;
2972                 aic->enable = true;
2973
2974                 eq = &eqo->q;
2975                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2976                                     sizeof(struct be_eq_entry));
2977                 if (rc)
2978                         return rc;
2979
2980                 rc = be_cmd_eq_create(adapter, eqo);
2981                 if (rc)
2982                         return rc;
2983
2984                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2985                         return -ENOMEM;
2986                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2987                                 eqo->affinity_mask);
2988                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2989                                BE_NAPI_WEIGHT);
2990         }
2991         return 0;
2992 }
2993
2994 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2995 {
2996         struct be_queue_info *q;
2997
2998         q = &adapter->mcc_obj.q;
2999         if (q->created)
3000                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
3001         be_queue_free(adapter, q);
3002
3003         q = &adapter->mcc_obj.cq;
3004         if (q->created)
3005                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3006         be_queue_free(adapter, q);
3007 }
3008
3009 /* Must be called only after TX qs are created as MCC shares TX EQ */
3010 static int be_mcc_queues_create(struct be_adapter *adapter)
3011 {
3012         struct be_queue_info *q, *cq;
3013
3014         cq = &adapter->mcc_obj.cq;
3015         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3016                            sizeof(struct be_mcc_compl)))
3017                 goto err;
3018
3019         /* Use the default EQ for MCC completions */
3020         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3021                 goto mcc_cq_free;
3022
3023         q = &adapter->mcc_obj.q;
3024         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3025                 goto mcc_cq_destroy;
3026
3027         if (be_cmd_mccq_create(adapter, q, cq))
3028                 goto mcc_q_free;
3029
3030         return 0;
3031
3032 mcc_q_free:
3033         be_queue_free(adapter, q);
3034 mcc_cq_destroy:
3035         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3036 mcc_cq_free:
3037         be_queue_free(adapter, cq);
3038 err:
3039         return -1;
3040 }
3041
3042 static void be_tx_queues_destroy(struct be_adapter *adapter)
3043 {
3044         struct be_queue_info *q;
3045         struct be_tx_obj *txo;
3046         u8 i;
3047
3048         for_all_tx_queues(adapter, txo, i) {
3049                 q = &txo->q;
3050                 if (q->created)
3051                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3052                 be_queue_free(adapter, q);
3053
3054                 q = &txo->cq;
3055                 if (q->created)
3056                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3057                 be_queue_free(adapter, q);
3058         }
3059 }
3060
3061 static int be_tx_qs_create(struct be_adapter *adapter)
3062 {
3063         struct be_queue_info *cq;
3064         struct be_tx_obj *txo;
3065         struct be_eq_obj *eqo;
3066         int status, i;
3067
3068         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3069
3070         for_all_tx_queues(adapter, txo, i) {
3071                 cq = &txo->cq;
3072                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3073                                         sizeof(struct be_eth_tx_compl));
3074                 if (status)
3075                         return status;
3076
3077                 u64_stats_init(&txo->stats.sync);
3078                 u64_stats_init(&txo->stats.sync_compl);
3079
3080                 /* If num_evt_qs is less than num_tx_qs, then more than
3081                  * one txq share an eq
3082                  */
3083                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3084                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3085                 if (status)
3086                         return status;
3087
3088                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3089                                         sizeof(struct be_eth_wrb));
3090                 if (status)
3091                         return status;
3092
3093                 status = be_cmd_txq_create(adapter, txo);
3094                 if (status)
3095                         return status;
3096
3097                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3098                                     eqo->idx);
3099         }
3100
3101         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3102                  adapter->num_tx_qs);
3103         return 0;
3104 }
3105
3106 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3107 {
3108         struct be_queue_info *q;
3109         struct be_rx_obj *rxo;
3110         int i;
3111
3112         for_all_rx_queues(adapter, rxo, i) {
3113                 q = &rxo->cq;
3114                 if (q->created)
3115                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3116                 be_queue_free(adapter, q);
3117         }
3118 }
3119
3120 static int be_rx_cqs_create(struct be_adapter *adapter)
3121 {
3122         struct be_queue_info *eq, *cq;
3123         struct be_rx_obj *rxo;
3124         int rc, i;
3125
3126         adapter->num_rss_qs =
3127                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3128
3129         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3130         if (adapter->num_rss_qs < 2)
3131                 adapter->num_rss_qs = 0;
3132
3133         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3134
3135         /* When the interface is not capable of RSS rings (and there is no
3136          * need to create a default RXQ) we'll still need one RXQ
3137          */
3138         if (adapter->num_rx_qs == 0)
3139                 adapter->num_rx_qs = 1;
3140
3141         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3142         for_all_rx_queues(adapter, rxo, i) {
3143                 rxo->adapter = adapter;
3144                 cq = &rxo->cq;
3145                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3146                                     sizeof(struct be_eth_rx_compl));
3147                 if (rc)
3148                         return rc;
3149
3150                 u64_stats_init(&rxo->stats.sync);
3151                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3152                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3153                 if (rc)
3154                         return rc;
3155         }
3156
3157         dev_info(&adapter->pdev->dev,
3158                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3159         return 0;
3160 }
3161
3162 static irqreturn_t be_intx(int irq, void *dev)
3163 {
3164         struct be_eq_obj *eqo = dev;
3165         struct be_adapter *adapter = eqo->adapter;
3166         int num_evts = 0;
3167
3168         /* IRQ is not expected when NAPI is scheduled as the EQ
3169          * will not be armed.
3170          * But, this can happen on Lancer INTx where it takes
3171          * a while to de-assert INTx or in BE2 where occasionaly
3172          * an interrupt may be raised even when EQ is unarmed.
3173          * If NAPI is already scheduled, then counting & notifying
3174          * events will orphan them.
3175          */
3176         if (napi_schedule_prep(&eqo->napi)) {
3177                 num_evts = events_get(eqo);
3178                 __napi_schedule(&eqo->napi);
3179                 if (num_evts)
3180                         eqo->spurious_intr = 0;
3181         }
3182         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3183
3184         /* Return IRQ_HANDLED only for the the first spurious intr
3185          * after a valid intr to stop the kernel from branding
3186          * this irq as a bad one!
3187          */
3188         if (num_evts || eqo->spurious_intr++ == 0)
3189                 return IRQ_HANDLED;
3190         else
3191                 return IRQ_NONE;
3192 }
3193
3194 static irqreturn_t be_msix(int irq, void *dev)
3195 {
3196         struct be_eq_obj *eqo = dev;
3197
3198         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3199         napi_schedule(&eqo->napi);
3200         return IRQ_HANDLED;
3201 }
3202
3203 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3204 {
3205         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3206 }
3207
3208 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3209                          int budget)
3210 {
3211         struct be_adapter *adapter = rxo->adapter;
3212         struct be_queue_info *rx_cq = &rxo->cq;
3213         struct be_rx_compl_info *rxcp;
3214         u32 work_done;
3215         u32 frags_consumed = 0;
3216
3217         for (work_done = 0; work_done < budget; work_done++) {
3218                 rxcp = be_rx_compl_get(rxo);
3219                 if (!rxcp)
3220                         break;
3221
3222                 /* Is it a flush compl that has no data */
3223                 if (unlikely(rxcp->num_rcvd == 0))
3224                         goto loop_continue;
3225
3226                 /* Discard compl with partial DMA Lancer B0 */
3227                 if (unlikely(!rxcp->pkt_size)) {
3228                         be_rx_compl_discard(rxo, rxcp);
3229                         goto loop_continue;
3230                 }
3231
3232                 /* On BE drop pkts that arrive due to imperfect filtering in
3233                  * promiscuous mode on some skews
3234                  */
3235                 if (unlikely(rxcp->port != adapter->port_num &&
3236                              !lancer_chip(adapter))) {
3237                         be_rx_compl_discard(rxo, rxcp);
3238                         goto loop_continue;
3239                 }
3240
3241                 if (do_gro(rxcp))
3242                         be_rx_compl_process_gro(rxo, napi, rxcp);
3243                 else
3244                         be_rx_compl_process(rxo, napi, rxcp);
3245
3246 loop_continue:
3247                 frags_consumed += rxcp->num_rcvd;
3248                 be_rx_stats_update(rxo, rxcp);
3249         }
3250
3251         if (work_done) {
3252                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3253
3254                 /* When an rx-obj gets into post_starved state, just
3255                  * let be_worker do the posting.
3256                  */
3257                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3258                     !rxo->rx_post_starved)
3259                         be_post_rx_frags(rxo, GFP_ATOMIC,
3260                                          max_t(u32, MAX_RX_POST,
3261                                                frags_consumed));
3262         }
3263
3264         return work_done;
3265 }
3266
3267
3268 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3269                           int idx)
3270 {
3271         int num_wrbs = 0, work_done = 0;
3272         struct be_tx_compl_info *txcp;
3273
3274         while ((txcp = be_tx_compl_get(adapter, txo))) {
3275                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3276                 work_done++;
3277         }
3278
3279         if (work_done) {
3280                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3281                 atomic_sub(num_wrbs, &txo->q.used);
3282
3283                 /* As Tx wrbs have been freed up, wake up netdev queue
3284                  * if it was stopped due to lack of tx wrbs.  */
3285                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3286                     be_can_txq_wake(txo)) {
3287                         netif_wake_subqueue(adapter->netdev, idx);
3288                 }
3289
3290                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3291                 tx_stats(txo)->tx_compl += work_done;
3292                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3293         }
3294 }
3295
3296 int be_poll(struct napi_struct *napi, int budget)
3297 {
3298         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3299         struct be_adapter *adapter = eqo->adapter;
3300         int max_work = 0, work, i, num_evts;
3301         struct be_rx_obj *rxo;
3302         struct be_tx_obj *txo;
3303         u32 mult_enc = 0;
3304
3305         num_evts = events_get(eqo);
3306
3307         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3308                 be_process_tx(adapter, txo, i);
3309
3310         /* This loop will iterate twice for EQ0 in which
3311          * completions of the last RXQ (default one) are also processed
3312          * For other EQs the loop iterates only once
3313          */
3314         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3315                 work = be_process_rx(rxo, napi, budget);
3316                 max_work = max(work, max_work);
3317         }
3318
3319         if (is_mcc_eqo(eqo))
3320                 be_process_mcc(adapter);
3321
3322         if (max_work < budget) {
3323                 napi_complete_done(napi, max_work);
3324
3325                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3326                  * delay via a delay multiplier encoding value
3327                  */
3328                 if (skyhawk_chip(adapter))
3329                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3330
3331                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3332                              mult_enc);
3333         } else {
3334                 /* As we'll continue in polling mode, count and clear events */
3335                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3336         }
3337         return max_work;
3338 }
3339
3340 void be_detect_error(struct be_adapter *adapter)
3341 {
3342         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3343         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3344         struct device *dev = &adapter->pdev->dev;
3345         u16 val;
3346         u32 i;
3347
3348         if (be_check_error(adapter, BE_ERROR_HW))
3349                 return;
3350
3351         if (lancer_chip(adapter)) {
3352                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3353                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3354                         be_set_error(adapter, BE_ERROR_UE);
3355                         sliport_err1 = ioread32(adapter->db +
3356                                                 SLIPORT_ERROR1_OFFSET);
3357                         sliport_err2 = ioread32(adapter->db +
3358                                                 SLIPORT_ERROR2_OFFSET);
3359                         /* Do not log error messages if its a FW reset */
3360                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3361                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3362                                 dev_info(dev, "Reset is in progress\n");
3363                         } else {
3364                                 dev_err(dev, "Error detected in the card\n");
3365                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3366                                         sliport_status);
3367                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3368                                         sliport_err1);
3369                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3370                                         sliport_err2);
3371                         }
3372                 }
3373         } else {
3374                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3375                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3376                 ue_lo_mask = ioread32(adapter->pcicfg +
3377                                       PCICFG_UE_STATUS_LOW_MASK);
3378                 ue_hi_mask = ioread32(adapter->pcicfg +
3379                                       PCICFG_UE_STATUS_HI_MASK);
3380
3381                 ue_lo = (ue_lo & ~ue_lo_mask);
3382                 ue_hi = (ue_hi & ~ue_hi_mask);
3383
3384                 if (ue_lo || ue_hi) {
3385                         /* On certain platforms BE3 hardware can indicate
3386                          * spurious UEs. In case of a UE in the chip,
3387                          * the POST register correctly reports either a
3388                          * FAT_LOG_START state (FW is currently dumping
3389                          * FAT log data) or a ARMFW_UE state. Check for the
3390                          * above states to ascertain if the UE is valid or not.
3391                          */
3392                         if (BE3_chip(adapter)) {
3393                                 val = be_POST_stage_get(adapter);
3394                                 if ((val & POST_STAGE_FAT_LOG_START)
3395                                      != POST_STAGE_FAT_LOG_START &&
3396                                     (val & POST_STAGE_ARMFW_UE)
3397                                      != POST_STAGE_ARMFW_UE &&
3398                                     (val & POST_STAGE_RECOVERABLE_ERR)
3399                                      != POST_STAGE_RECOVERABLE_ERR)
3400                                         return;
3401                         }
3402
3403                         dev_err(dev, "Error detected in the adapter");
3404                         be_set_error(adapter, BE_ERROR_UE);
3405
3406                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3407                                 if (ue_lo & 1)
3408                                         dev_err(dev, "UE: %s bit set\n",
3409                                                 ue_status_low_desc[i]);
3410                         }
3411                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3412                                 if (ue_hi & 1)
3413                                         dev_err(dev, "UE: %s bit set\n",
3414                                                 ue_status_hi_desc[i]);
3415                         }
3416                 }
3417         }
3418 }
3419
3420 static void be_msix_disable(struct be_adapter *adapter)
3421 {
3422         if (msix_enabled(adapter)) {
3423                 pci_disable_msix(adapter->pdev);
3424                 adapter->num_msix_vec = 0;
3425                 adapter->num_msix_roce_vec = 0;
3426         }
3427 }
3428
3429 static int be_msix_enable(struct be_adapter *adapter)
3430 {
3431         unsigned int i, max_roce_eqs;
3432         struct device *dev = &adapter->pdev->dev;
3433         int num_vec;
3434
3435         /* If RoCE is supported, program the max number of vectors that
3436          * could be used for NIC and RoCE, else, just program the number
3437          * we'll use initially.
3438          */
3439         if (be_roce_supported(adapter)) {
3440                 max_roce_eqs =
3441                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3442                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3443                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3444         } else {
3445                 num_vec = max(adapter->cfg_num_rx_irqs,
3446                               adapter->cfg_num_tx_irqs);
3447         }
3448
3449         for (i = 0; i < num_vec; i++)
3450                 adapter->msix_entries[i].entry = i;
3451
3452         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3453                                         MIN_MSIX_VECTORS, num_vec);
3454         if (num_vec < 0)
3455                 goto fail;
3456
3457         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3458                 adapter->num_msix_roce_vec = num_vec / 2;
3459                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3460                          adapter->num_msix_roce_vec);
3461         }
3462
3463         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3464
3465         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3466                  adapter->num_msix_vec);
3467         return 0;
3468
3469 fail:
3470         dev_warn(dev, "MSIx enable failed\n");
3471
3472         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3473         if (be_virtfn(adapter))
3474                 return num_vec;
3475         return 0;
3476 }
3477
3478 static inline int be_msix_vec_get(struct be_adapter *adapter,
3479                                   struct be_eq_obj *eqo)
3480 {
3481         return adapter->msix_entries[eqo->msix_idx].vector;
3482 }
3483
3484 static int be_msix_register(struct be_adapter *adapter)
3485 {
3486         struct net_device *netdev = adapter->netdev;
3487         struct be_eq_obj *eqo;
3488         int status, i, vec;
3489
3490         for_all_evt_queues(adapter, eqo, i) {
3491                 char irq_name[IFNAMSIZ+4];
3492
3493                 snprintf(irq_name, sizeof(irq_name), "%s-q%d", netdev->name, i);
3494                 vec = be_msix_vec_get(adapter, eqo);
3495                 status = request_irq(vec, be_msix, 0, irq_name, eqo);
3496                 if (status)
3497                         goto err_msix;
3498
3499                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3500         }
3501
3502         return 0;
3503 err_msix:
3504         for (i--; i >= 0; i--) {
3505                 eqo = &adapter->eq_obj[i];
3506                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3507         }
3508         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3509                  status);
3510         be_msix_disable(adapter);
3511         return status;
3512 }
3513
3514 static int be_irq_register(struct be_adapter *adapter)
3515 {
3516         struct net_device *netdev = adapter->netdev;
3517         int status;
3518
3519         if (msix_enabled(adapter)) {
3520                 status = be_msix_register(adapter);
3521                 if (status == 0)
3522                         goto done;
3523                 /* INTx is not supported for VF */
3524                 if (be_virtfn(adapter))
3525                         return status;
3526         }
3527
3528         /* INTx: only the first EQ is used */
3529         netdev->irq = adapter->pdev->irq;
3530         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3531                              &adapter->eq_obj[0]);
3532         if (status) {
3533                 dev_err(&adapter->pdev->dev,
3534                         "INTx request IRQ failed - err %d\n", status);
3535                 return status;
3536         }
3537 done:
3538         adapter->isr_registered = true;
3539         return 0;
3540 }
3541
3542 static void be_irq_unregister(struct be_adapter *adapter)
3543 {
3544         struct net_device *netdev = adapter->netdev;
3545         struct be_eq_obj *eqo;
3546         int i, vec;
3547
3548         if (!adapter->isr_registered)
3549                 return;
3550
3551         /* INTx */
3552         if (!msix_enabled(adapter)) {
3553                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3554                 goto done;
3555         }
3556
3557         /* MSIx */
3558         for_all_evt_queues(adapter, eqo, i) {
3559                 vec = be_msix_vec_get(adapter, eqo);
3560                 irq_set_affinity_hint(vec, NULL);
3561                 free_irq(vec, eqo);
3562         }
3563
3564 done:
3565         adapter->isr_registered = false;
3566 }
3567
3568 static void be_rx_qs_destroy(struct be_adapter *adapter)
3569 {
3570         struct rss_info *rss = &adapter->rss_info;
3571         struct be_queue_info *q;
3572         struct be_rx_obj *rxo;
3573         int i;
3574
3575         for_all_rx_queues(adapter, rxo, i) {
3576                 q = &rxo->q;
3577                 if (q->created) {
3578                         /* If RXQs are destroyed while in an "out of buffer"
3579                          * state, there is a possibility of an HW stall on
3580                          * Lancer. So, post 64 buffers to each queue to relieve
3581                          * the "out of buffer" condition.
3582                          * Make sure there's space in the RXQ before posting.
3583                          */
3584                         if (lancer_chip(adapter)) {
3585                                 be_rx_cq_clean(rxo);
3586                                 if (atomic_read(&q->used) == 0)
3587                                         be_post_rx_frags(rxo, GFP_KERNEL,
3588                                                          MAX_RX_POST);
3589                         }
3590
3591                         be_cmd_rxq_destroy(adapter, q);
3592                         be_rx_cq_clean(rxo);
3593                         be_rxq_clean(rxo);
3594                 }
3595                 be_queue_free(adapter, q);
3596         }
3597
3598         if (rss->rss_flags) {
3599                 rss->rss_flags = RSS_ENABLE_NONE;
3600                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3601                                   128, rss->rss_hkey);
3602         }
3603 }
3604
3605 static void be_disable_if_filters(struct be_adapter *adapter)
3606 {
3607         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3608         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3609             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3610                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3611                 eth_zero_addr(adapter->dev_mac);
3612         }
3613
3614         be_clear_uc_list(adapter);
3615         be_clear_mc_list(adapter);
3616
3617         /* The IFACE flags are enabled in the open path and cleared
3618          * in the close path. When a VF gets detached from the host and
3619          * assigned to a VM the following happens:
3620          *      - VF's IFACE flags get cleared in the detach path
3621          *      - IFACE create is issued by the VF in the attach path
3622          * Due to a bug in the BE3/Skyhawk-R FW
3623          * (Lancer FW doesn't have the bug), the IFACE capability flags
3624          * specified along with the IFACE create cmd issued by a VF are not
3625          * honoured by FW.  As a consequence, if a *new* driver
3626          * (that enables/disables IFACE flags in open/close)
3627          * is loaded in the host and an *old* driver is * used by a VM/VF,
3628          * the IFACE gets created *without* the needed flags.
3629          * To avoid this, disable RX-filter flags only for Lancer.
3630          */
3631         if (lancer_chip(adapter)) {
3632                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3633                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3634         }
3635 }
3636
3637 static int be_close(st