Merge tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma...
[muen/linux.git] / drivers / net / ethernet / mellanox / mlx4 / main.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005, 2006, 2007, 2008 Mellanox Technologies. All rights reserved.
5  * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/module.h>
37 #include <linux/kernel.h>
38 #include <linux/init.h>
39 #include <linux/errno.h>
40 #include <linux/pci.h>
41 #include <linux/dma-mapping.h>
42 #include <linux/slab.h>
43 #include <linux/io-mapping.h>
44 #include <linux/delay.h>
45 #include <linux/kmod.h>
46 #include <linux/etherdevice.h>
47 #include <net/devlink.h>
48
49 #include <uapi/rdma/mlx4-abi.h>
50 #include <linux/mlx4/device.h>
51 #include <linux/mlx4/doorbell.h>
52
53 #include "mlx4.h"
54 #include "fw.h"
55 #include "icm.h"
56
57 MODULE_AUTHOR("Roland Dreier");
58 MODULE_DESCRIPTION("Mellanox ConnectX HCA low-level driver");
59 MODULE_LICENSE("Dual BSD/GPL");
60 MODULE_VERSION(DRV_VERSION);
61
62 struct workqueue_struct *mlx4_wq;
63
64 #ifdef CONFIG_MLX4_DEBUG
65
66 int mlx4_debug_level = 0;
67 module_param_named(debug_level, mlx4_debug_level, int, 0644);
68 MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
69
70 #endif /* CONFIG_MLX4_DEBUG */
71
72 #ifdef CONFIG_PCI_MSI
73
74 static int msi_x = 1;
75 module_param(msi_x, int, 0444);
76 MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
77
78 #else /* CONFIG_PCI_MSI */
79
80 #define msi_x (0)
81
82 #endif /* CONFIG_PCI_MSI */
83
84 static uint8_t num_vfs[3] = {0, 0, 0};
85 static int num_vfs_argc;
86 module_param_array(num_vfs, byte , &num_vfs_argc, 0444);
87 MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0\n"
88                           "num_vfs=port1,port2,port1+2");
89
90 static uint8_t probe_vf[3] = {0, 0, 0};
91 static int probe_vfs_argc;
92 module_param_array(probe_vf, byte, &probe_vfs_argc, 0444);
93 MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)\n"
94                            "probe_vf=port1,port2,port1+2");
95
96 static int mlx4_log_num_mgm_entry_size = MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
97 module_param_named(log_num_mgm_entry_size,
98                         mlx4_log_num_mgm_entry_size, int, 0444);
99 MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
100                                          " of qp per mcg, for example:"
101                                          " 10 gives 248.range: 7 <="
102                                          " log_num_mgm_entry_size <= 12."
103                                          " To activate device managed"
104                                          " flow steering when available, set to -1");
105
106 static bool enable_64b_cqe_eqe = true;
107 module_param(enable_64b_cqe_eqe, bool, 0444);
108 MODULE_PARM_DESC(enable_64b_cqe_eqe,
109                  "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
110
111 static bool enable_4k_uar;
112 module_param(enable_4k_uar, bool, 0444);
113 MODULE_PARM_DESC(enable_4k_uar,
114                  "Enable using 4K UAR. Should not be enabled if have VFs which do not support 4K UARs (default: false)");
115
116 #define PF_CONTEXT_BEHAVIOUR_MASK       (MLX4_FUNC_CAP_64B_EQE_CQE | \
117                                          MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
118                                          MLX4_FUNC_CAP_DMFS_A0_STATIC)
119
120 #define RESET_PERSIST_MASK_FLAGS        (MLX4_FLAG_SRIOV)
121
122 static char mlx4_version[] =
123         DRV_NAME ": Mellanox ConnectX core driver v"
124         DRV_VERSION "\n";
125
126 static const struct mlx4_profile default_profile = {
127         .num_qp         = 1 << 18,
128         .num_srq        = 1 << 16,
129         .rdmarc_per_qp  = 1 << 4,
130         .num_cq         = 1 << 16,
131         .num_mcg        = 1 << 13,
132         .num_mpt        = 1 << 19,
133         .num_mtt        = 1 << 20, /* It is really num mtt segements */
134 };
135
136 static const struct mlx4_profile low_mem_profile = {
137         .num_qp         = 1 << 17,
138         .num_srq        = 1 << 6,
139         .rdmarc_per_qp  = 1 << 4,
140         .num_cq         = 1 << 8,
141         .num_mcg        = 1 << 8,
142         .num_mpt        = 1 << 9,
143         .num_mtt        = 1 << 7,
144 };
145
146 static int log_num_mac = 7;
147 module_param_named(log_num_mac, log_num_mac, int, 0444);
148 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
149
150 static int log_num_vlan;
151 module_param_named(log_num_vlan, log_num_vlan, int, 0444);
152 MODULE_PARM_DESC(log_num_vlan, "Log2 max number of VLANs per ETH port (0-7)");
153 /* Log2 max number of VLANs per ETH port (0-7) */
154 #define MLX4_LOG_NUM_VLANS 7
155 #define MLX4_MIN_LOG_NUM_VLANS 0
156 #define MLX4_MIN_LOG_NUM_MAC 1
157
158 static bool use_prio;
159 module_param_named(use_prio, use_prio, bool, 0444);
160 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports (deprecated)");
161
162 int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
163 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
164 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
165
166 static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
167 static int arr_argc = 2;
168 module_param_array(port_type_array, int, &arr_argc, 0444);
169 MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
170                                 "1 for IB, 2 for Ethernet");
171
172 struct mlx4_port_config {
173         struct list_head list;
174         enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
175         struct pci_dev *pdev;
176 };
177
178 static atomic_t pf_loading = ATOMIC_INIT(0);
179
180 static inline void mlx4_set_num_reserved_uars(struct mlx4_dev *dev,
181                                               struct mlx4_dev_cap *dev_cap)
182 {
183         /* The reserved_uars is calculated by system page size unit.
184          * Therefore, adjustment is added when the uar page size is less
185          * than the system page size
186          */
187         dev->caps.reserved_uars =
188                 max_t(int,
189                       mlx4_get_num_reserved_uar(dev),
190                       dev_cap->reserved_uars /
191                         (1 << (PAGE_SHIFT - dev->uar_page_shift)));
192 }
193
194 int mlx4_check_port_params(struct mlx4_dev *dev,
195                            enum mlx4_port_type *port_type)
196 {
197         int i;
198
199         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
200                 for (i = 0; i < dev->caps.num_ports - 1; i++) {
201                         if (port_type[i] != port_type[i + 1]) {
202                                 mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
203                                 return -EINVAL;
204                         }
205                 }
206         }
207
208         for (i = 0; i < dev->caps.num_ports; i++) {
209                 if (!(port_type[i] & dev->caps.supported_type[i+1])) {
210                         mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n",
211                                  i + 1);
212                         return -EINVAL;
213                 }
214         }
215         return 0;
216 }
217
218 static void mlx4_set_port_mask(struct mlx4_dev *dev)
219 {
220         int i;
221
222         for (i = 1; i <= dev->caps.num_ports; ++i)
223                 dev->caps.port_mask[i] = dev->caps.port_type[i];
224 }
225
226 enum {
227         MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
228 };
229
230 static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
231 {
232         int err = 0;
233         struct mlx4_func func;
234
235         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
236                 err = mlx4_QUERY_FUNC(dev, &func, 0);
237                 if (err) {
238                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
239                         return err;
240                 }
241                 dev_cap->max_eqs = func.max_eq;
242                 dev_cap->reserved_eqs = func.rsvd_eqs;
243                 dev_cap->reserved_uars = func.rsvd_uars;
244                 err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
245         }
246         return err;
247 }
248
249 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
250 {
251         struct mlx4_caps *dev_cap = &dev->caps;
252
253         /* FW not supporting or cancelled by user */
254         if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
255             !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
256                 return;
257
258         /* Must have 64B CQE_EQE enabled by FW to use bigger stride
259          * When FW has NCSI it may decide not to report 64B CQE/EQEs
260          */
261         if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
262             !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
263                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
264                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
265                 return;
266         }
267
268         if (cache_line_size() == 128 || cache_line_size() == 256) {
269                 mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
270                 /* Changing the real data inside CQE size to 32B */
271                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
272                 dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
273
274                 if (mlx4_is_master(dev))
275                         dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
276         } else {
277                 if (cache_line_size() != 32  && cache_line_size() != 64)
278                         mlx4_dbg(dev, "Disabling CQE stride, cacheLine size unsupported\n");
279                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
280                 dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
281         }
282 }
283
284 static int _mlx4_dev_port(struct mlx4_dev *dev, int port,
285                           struct mlx4_port_cap *port_cap)
286 {
287         dev->caps.vl_cap[port]      = port_cap->max_vl;
288         dev->caps.ib_mtu_cap[port]          = port_cap->ib_mtu;
289         dev->phys_caps.gid_phys_table_len[port]  = port_cap->max_gids;
290         dev->phys_caps.pkey_phys_table_len[port] = port_cap->max_pkeys;
291         /* set gid and pkey table operating lengths by default
292          * to non-sriov values
293          */
294         dev->caps.gid_table_len[port]  = port_cap->max_gids;
295         dev->caps.pkey_table_len[port] = port_cap->max_pkeys;
296         dev->caps.port_width_cap[port] = port_cap->max_port_width;
297         dev->caps.eth_mtu_cap[port]    = port_cap->eth_mtu;
298         dev->caps.max_tc_eth           = port_cap->max_tc_eth;
299         dev->caps.def_mac[port]        = port_cap->def_mac;
300         dev->caps.supported_type[port] = port_cap->supported_port_types;
301         dev->caps.suggested_type[port] = port_cap->suggested_type;
302         dev->caps.default_sense[port] = port_cap->default_sense;
303         dev->caps.trans_type[port]          = port_cap->trans_type;
304         dev->caps.vendor_oui[port]     = port_cap->vendor_oui;
305         dev->caps.wavelength[port]     = port_cap->wavelength;
306         dev->caps.trans_code[port]     = port_cap->trans_code;
307
308         return 0;
309 }
310
311 static int mlx4_dev_port(struct mlx4_dev *dev, int port,
312                          struct mlx4_port_cap *port_cap)
313 {
314         int err = 0;
315
316         err = mlx4_QUERY_PORT(dev, port, port_cap);
317
318         if (err)
319                 mlx4_err(dev, "QUERY_PORT command failed.\n");
320
321         return err;
322 }
323
324 static inline void mlx4_enable_ignore_fcs(struct mlx4_dev *dev)
325 {
326         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_IGNORE_FCS))
327                 return;
328
329         if (mlx4_is_mfunc(dev)) {
330                 mlx4_dbg(dev, "SRIOV mode - Disabling Ignore FCS");
331                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
332                 return;
333         }
334
335         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP)) {
336                 mlx4_dbg(dev,
337                          "Keep FCS is not supported - Disabling Ignore FCS");
338                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_IGNORE_FCS;
339                 return;
340         }
341 }
342
343 #define MLX4_A0_STEERING_TABLE_SIZE     256
344 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
345 {
346         int err;
347         int i;
348
349         err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
350         if (err) {
351                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
352                 return err;
353         }
354         mlx4_dev_cap_dump(dev, dev_cap);
355
356         if (dev_cap->min_page_sz > PAGE_SIZE) {
357                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
358                          dev_cap->min_page_sz, PAGE_SIZE);
359                 return -ENODEV;
360         }
361         if (dev_cap->num_ports > MLX4_MAX_PORTS) {
362                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
363                          dev_cap->num_ports, MLX4_MAX_PORTS);
364                 return -ENODEV;
365         }
366
367         if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
368                 mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
369                          dev_cap->uar_size,
370                          (unsigned long long)
371                          pci_resource_len(dev->persist->pdev, 2));
372                 return -ENODEV;
373         }
374
375         dev->caps.num_ports          = dev_cap->num_ports;
376         dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
377         dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
378                                       dev->caps.num_sys_eqs :
379                                       MLX4_MAX_EQ_NUM;
380         for (i = 1; i <= dev->caps.num_ports; ++i) {
381                 err = _mlx4_dev_port(dev, i, dev_cap->port_cap + i);
382                 if (err) {
383                         mlx4_err(dev, "QUERY_PORT command failed, aborting\n");
384                         return err;
385                 }
386         }
387
388         dev->caps.uar_page_size      = PAGE_SIZE;
389         dev->caps.num_uars           = dev_cap->uar_size / PAGE_SIZE;
390         dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
391         dev->caps.bf_reg_size        = dev_cap->bf_reg_size;
392         dev->caps.bf_regs_per_page   = dev_cap->bf_regs_per_page;
393         dev->caps.max_sq_sg          = dev_cap->max_sq_sg;
394         dev->caps.max_rq_sg          = dev_cap->max_rq_sg;
395         dev->caps.max_wqes           = dev_cap->max_qp_sz;
396         dev->caps.max_qp_init_rdma   = dev_cap->max_requester_per_qp;
397         dev->caps.max_srq_wqes       = dev_cap->max_srq_sz;
398         dev->caps.max_srq_sge        = dev_cap->max_rq_sg - 1;
399         dev->caps.reserved_srqs      = dev_cap->reserved_srqs;
400         dev->caps.max_sq_desc_sz     = dev_cap->max_sq_desc_sz;
401         dev->caps.max_rq_desc_sz     = dev_cap->max_rq_desc_sz;
402         /*
403          * Subtract 1 from the limit because we need to allocate a
404          * spare CQE so the HCA HW can tell the difference between an
405          * empty CQ and a full CQ.
406          */
407         dev->caps.max_cqes           = dev_cap->max_cq_sz - 1;
408         dev->caps.reserved_cqs       = dev_cap->reserved_cqs;
409         dev->caps.reserved_eqs       = dev_cap->reserved_eqs;
410         dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
411         dev->caps.reserved_mrws      = dev_cap->reserved_mrws;
412
413         dev->caps.reserved_pds       = dev_cap->reserved_pds;
414         dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
415                                         dev_cap->reserved_xrcds : 0;
416         dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
417                                         dev_cap->max_xrcds : 0;
418         dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
419
420         dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
421         dev->caps.page_size_cap      = ~(u32) (dev_cap->min_page_sz - 1);
422         dev->caps.flags              = dev_cap->flags;
423         dev->caps.flags2             = dev_cap->flags2;
424         dev->caps.bmme_flags         = dev_cap->bmme_flags;
425         dev->caps.reserved_lkey      = dev_cap->reserved_lkey;
426         dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
427         dev->caps.max_gso_sz         = dev_cap->max_gso_sz;
428         dev->caps.max_rss_tbl_sz     = dev_cap->max_rss_tbl_sz;
429         dev->caps.wol_port[1]          = dev_cap->wol_port[1];
430         dev->caps.wol_port[2]          = dev_cap->wol_port[2];
431
432         /* Save uar page shift */
433         if (!mlx4_is_slave(dev)) {
434                 /* Virtual PCI function needs to determine UAR page size from
435                  * firmware. Only master PCI function can set the uar page size
436                  */
437                 if (enable_4k_uar || !dev->persist->num_vfs)
438                         dev->uar_page_shift = DEFAULT_UAR_PAGE_SHIFT;
439                 else
440                         dev->uar_page_shift = PAGE_SHIFT;
441
442                 mlx4_set_num_reserved_uars(dev, dev_cap);
443         }
444
445         if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN) {
446                 struct mlx4_init_hca_param hca_param;
447
448                 memset(&hca_param, 0, sizeof(hca_param));
449                 err = mlx4_QUERY_HCA(dev, &hca_param);
450                 /* Turn off PHV_EN flag in case phv_check_en is set.
451                  * phv_check_en is a HW check that parse the packet and verify
452                  * phv bit was reported correctly in the wqe. To allow QinQ
453                  * PHV_EN flag should be set and phv_check_en must be cleared
454                  * otherwise QinQ packets will be drop by the HW.
455                  */
456                 if (err || hca_param.phv_check_en)
457                         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_PHV_EN;
458         }
459
460         /* Sense port always allowed on supported devices for ConnectX-1 and -2 */
461         if (mlx4_priv(dev)->pci_dev_data & MLX4_PCI_DEV_FORCE_SENSE_PORT)
462                 dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
463         /* Don't do sense port on multifunction devices (for now at least) */
464         if (mlx4_is_mfunc(dev))
465                 dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
466
467         if (mlx4_low_memory_profile()) {
468                 dev->caps.log_num_macs  = MLX4_MIN_LOG_NUM_MAC;
469                 dev->caps.log_num_vlans = MLX4_MIN_LOG_NUM_VLANS;
470         } else {
471                 dev->caps.log_num_macs  = log_num_mac;
472                 dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
473         }
474
475         for (i = 1; i <= dev->caps.num_ports; ++i) {
476                 dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
477                 if (dev->caps.supported_type[i]) {
478                         /* if only ETH is supported - assign ETH */
479                         if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
480                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
481                         /* if only IB is supported, assign IB */
482                         else if (dev->caps.supported_type[i] ==
483                                  MLX4_PORT_TYPE_IB)
484                                 dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
485                         else {
486                                 /* if IB and ETH are supported, we set the port
487                                  * type according to user selection of port type;
488                                  * if user selected none, take the FW hint */
489                                 if (port_type_array[i - 1] == MLX4_PORT_TYPE_NONE)
490                                         dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
491                                                 MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
492                                 else
493                                         dev->caps.port_type[i] = port_type_array[i - 1];
494                         }
495                 }
496                 /*
497                  * Link sensing is allowed on the port if 3 conditions are true:
498                  * 1. Both protocols are supported on the port.
499                  * 2. Different types are supported on the port
500                  * 3. FW declared that it supports link sensing
501                  */
502                 mlx4_priv(dev)->sense.sense_allowed[i] =
503                         ((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
504                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
505                          (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
506
507                 /*
508                  * If "default_sense" bit is set, we move the port to "AUTO" mode
509                  * and perform sense_port FW command to try and set the correct
510                  * port type from beginning
511                  */
512                 if (mlx4_priv(dev)->sense.sense_allowed[i] && dev->caps.default_sense[i]) {
513                         enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
514                         dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
515                         mlx4_SENSE_PORT(dev, i, &sensed_port);
516                         if (sensed_port != MLX4_PORT_TYPE_NONE)
517                                 dev->caps.port_type[i] = sensed_port;
518                 } else {
519                         dev->caps.possible_type[i] = dev->caps.port_type[i];
520                 }
521
522                 if (dev->caps.log_num_macs > dev_cap->port_cap[i].log_max_macs) {
523                         dev->caps.log_num_macs = dev_cap->port_cap[i].log_max_macs;
524                         mlx4_warn(dev, "Requested number of MACs is too much for port %d, reducing to %d\n",
525                                   i, 1 << dev->caps.log_num_macs);
526                 }
527                 if (dev->caps.log_num_vlans > dev_cap->port_cap[i].log_max_vlans) {
528                         dev->caps.log_num_vlans = dev_cap->port_cap[i].log_max_vlans;
529                         mlx4_warn(dev, "Requested number of VLANs is too much for port %d, reducing to %d\n",
530                                   i, 1 << dev->caps.log_num_vlans);
531                 }
532         }
533
534         if (mlx4_is_master(dev) && (dev->caps.num_ports == 2) &&
535             (port_type_array[0] == MLX4_PORT_TYPE_IB) &&
536             (port_type_array[1] == MLX4_PORT_TYPE_ETH)) {
537                 mlx4_warn(dev,
538                           "Granular QoS per VF not supported with IB/Eth configuration\n");
539                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_QOS_VPP;
540         }
541
542         dev->caps.max_counters = dev_cap->max_counters;
543
544         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
545         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] =
546                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] =
547                 (1 << dev->caps.log_num_macs) *
548                 (1 << dev->caps.log_num_vlans) *
549                 dev->caps.num_ports;
550         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
551
552         if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
553             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
554                 dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
555         else
556                 dev->caps.dmfs_high_rate_qpn_base =
557                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
558
559         if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
560             dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
561                 dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
562                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
563                 dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
564         } else {
565                 dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
566                 dev->caps.dmfs_high_rate_qpn_base =
567                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
568                 dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
569         }
570
571         dev->caps.rl_caps = dev_cap->rl_caps;
572
573         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
574                 dev->caps.dmfs_high_rate_qpn_range;
575
576         dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
577                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
578                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] +
579                 dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH];
580
581         dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
582
583         if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
584                 if (dev_cap->flags &
585                     (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
586                         mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
587                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
588                         dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
589                 }
590
591                 if (dev_cap->flags2 &
592                     (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
593                      MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
594                         mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
595                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
596                         dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
597                 }
598         }
599
600         if ((dev->caps.flags &
601             (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) &&
602             mlx4_is_master(dev))
603                 dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
604
605         if (!mlx4_is_slave(dev)) {
606                 mlx4_enable_cqe_eqe_stride(dev);
607                 dev->caps.alloc_res_qp_mask =
608                         (dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
609                         MLX4_RESERVE_A0_QP;
610
611                 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) &&
612                     dev->caps.flags & MLX4_DEV_CAP_FLAG_SET_ETH_SCHED) {
613                         mlx4_warn(dev, "Old device ETS support detected\n");
614                         mlx4_warn(dev, "Consider upgrading device FW.\n");
615                         dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_ETS_CFG;
616                 }
617
618         } else {
619                 dev->caps.alloc_res_qp_mask = 0;
620         }
621
622         mlx4_enable_ignore_fcs(dev);
623
624         return 0;
625 }
626
627 static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
628                                        enum pci_bus_speed *speed,
629                                        enum pcie_link_width *width)
630 {
631         u32 lnkcap1, lnkcap2;
632         int err1, err2;
633
634 #define  PCIE_MLW_CAP_SHIFT 4   /* start of MLW mask in link capabilities */
635
636         *speed = PCI_SPEED_UNKNOWN;
637         *width = PCIE_LNK_WIDTH_UNKNOWN;
638
639         err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
640                                           &lnkcap1);
641         err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
642                                           &lnkcap2);
643         if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
644                 if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
645                         *speed = PCIE_SPEED_8_0GT;
646                 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_5_0GB)
647                         *speed = PCIE_SPEED_5_0GT;
648                 else if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_2_5GB)
649                         *speed = PCIE_SPEED_2_5GT;
650         }
651         if (!err1) {
652                 *width = (lnkcap1 & PCI_EXP_LNKCAP_MLW) >> PCIE_MLW_CAP_SHIFT;
653                 if (!lnkcap2) { /* pre-r3.0 */
654                         if (lnkcap1 & PCI_EXP_LNKCAP_SLS_5_0GB)
655                                 *speed = PCIE_SPEED_5_0GT;
656                         else if (lnkcap1 & PCI_EXP_LNKCAP_SLS_2_5GB)
657                                 *speed = PCIE_SPEED_2_5GT;
658                 }
659         }
660
661         if (*speed == PCI_SPEED_UNKNOWN || *width == PCIE_LNK_WIDTH_UNKNOWN) {
662                 return err1 ? err1 :
663                         err2 ? err2 : -EINVAL;
664         }
665         return 0;
666 }
667
668 static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
669 {
670         enum pcie_link_width width, width_cap;
671         enum pci_bus_speed speed, speed_cap;
672         int err;
673
674 #define PCIE_SPEED_STR(speed) \
675         (speed == PCIE_SPEED_8_0GT ? "8.0GT/s" : \
676          speed == PCIE_SPEED_5_0GT ? "5.0GT/s" : \
677          speed == PCIE_SPEED_2_5GT ? "2.5GT/s" : \
678          "Unknown")
679
680         err = mlx4_get_pcie_dev_link_caps(dev, &speed_cap, &width_cap);
681         if (err) {
682                 mlx4_warn(dev,
683                           "Unable to determine PCIe device BW capabilities\n");
684                 return;
685         }
686
687         err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
688         if (err || speed == PCI_SPEED_UNKNOWN ||
689             width == PCIE_LNK_WIDTH_UNKNOWN) {
690                 mlx4_warn(dev,
691                           "Unable to determine PCI device chain minimum BW\n");
692                 return;
693         }
694
695         if (width != width_cap || speed != speed_cap)
696                 mlx4_warn(dev,
697                           "PCIe BW is different than device's capability\n");
698
699         mlx4_info(dev, "PCIe link speed is %s, device supports %s\n",
700                   PCIE_SPEED_STR(speed), PCIE_SPEED_STR(speed_cap));
701         mlx4_info(dev, "PCIe link width is x%d, device supports x%d\n",
702                   width, width_cap);
703         return;
704 }
705
706 /*The function checks if there are live vf, return the num of them*/
707 static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
708 {
709         struct mlx4_priv *priv = mlx4_priv(dev);
710         struct mlx4_slave_state *s_state;
711         int i;
712         int ret = 0;
713
714         for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
715                 s_state = &priv->mfunc.master.slave_state[i];
716                 if (s_state->active && s_state->last_cmd !=
717                     MLX4_COMM_CMD_RESET) {
718                         mlx4_warn(dev, "%s: slave: %d is still active\n",
719                                   __func__, i);
720                         ret++;
721                 }
722         }
723         return ret;
724 }
725
726 int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey)
727 {
728         u32 qk = MLX4_RESERVED_QKEY_BASE;
729
730         if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX ||
731             qpn < dev->phys_caps.base_proxy_sqpn)
732                 return -EINVAL;
733
734         if (qpn >= dev->phys_caps.base_tunnel_sqpn)
735                 /* tunnel qp */
736                 qk += qpn - dev->phys_caps.base_tunnel_sqpn;
737         else
738                 qk += qpn - dev->phys_caps.base_proxy_sqpn;
739         *qkey = qk;
740         return 0;
741 }
742 EXPORT_SYMBOL(mlx4_get_parav_qkey);
743
744 void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val)
745 {
746         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
747
748         if (!mlx4_is_master(dev))
749                 return;
750
751         priv->virt2phys_pkey[slave][port - 1][i] = val;
752 }
753 EXPORT_SYMBOL(mlx4_sync_pkey_table);
754
755 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid)
756 {
757         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
758
759         if (!mlx4_is_master(dev))
760                 return;
761
762         priv->slave_node_guids[slave] = guid;
763 }
764 EXPORT_SYMBOL(mlx4_put_slave_node_guid);
765
766 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave)
767 {
768         struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev);
769
770         if (!mlx4_is_master(dev))
771                 return 0;
772
773         return priv->slave_node_guids[slave];
774 }
775 EXPORT_SYMBOL(mlx4_get_slave_node_guid);
776
777 int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
778 {
779         struct mlx4_priv *priv = mlx4_priv(dev);
780         struct mlx4_slave_state *s_slave;
781
782         if (!mlx4_is_master(dev))
783                 return 0;
784
785         s_slave = &priv->mfunc.master.slave_state[slave];
786         return !!s_slave->active;
787 }
788 EXPORT_SYMBOL(mlx4_is_slave_active);
789
790 void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl,
791                                        struct _rule_hw *eth_header)
792 {
793         if (is_multicast_ether_addr(eth_header->eth.dst_mac) ||
794             is_broadcast_ether_addr(eth_header->eth.dst_mac)) {
795                 struct mlx4_net_trans_rule_hw_eth *eth =
796                         (struct mlx4_net_trans_rule_hw_eth *)eth_header;
797                 struct _rule_hw *next_rule = (struct _rule_hw *)(eth + 1);
798                 bool last_rule = next_rule->size == 0 && next_rule->id == 0 &&
799                         next_rule->rsvd == 0;
800
801                 if (last_rule)
802                         ctrl->prio = cpu_to_be16(MLX4_DOMAIN_NIC);
803         }
804 }
805 EXPORT_SYMBOL(mlx4_handle_eth_header_mcast_prio);
806
807 static void slave_adjust_steering_mode(struct mlx4_dev *dev,
808                                        struct mlx4_dev_cap *dev_cap,
809                                        struct mlx4_init_hca_param *hca_param)
810 {
811         dev->caps.steering_mode = hca_param->steering_mode;
812         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
813                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
814                 dev->caps.fs_log_max_ucast_qp_range_size =
815                         dev_cap->fs_log_max_ucast_qp_range_size;
816         } else
817                 dev->caps.num_qp_per_mgm =
818                         4 * ((1 << hca_param->log_mc_entry_sz)/16 - 2);
819
820         mlx4_dbg(dev, "Steering mode is: %s\n",
821                  mlx4_steering_mode_str(dev->caps.steering_mode));
822 }
823
824 static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev)
825 {
826         kfree(dev->caps.spec_qps);
827         dev->caps.spec_qps = NULL;
828 }
829
830 static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev)
831 {
832         struct mlx4_func_cap *func_cap = NULL;
833         struct mlx4_caps *caps = &dev->caps;
834         int i, err = 0;
835
836         func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
837         caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL);
838
839         if (!func_cap || !caps->spec_qps) {
840                 mlx4_err(dev, "Failed to allocate memory for special qps cap\n");
841                 err = -ENOMEM;
842                 goto err_mem;
843         }
844
845         for (i = 1; i <= caps->num_ports; ++i) {
846                 err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap);
847                 if (err) {
848                         mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n",
849                                  i, err);
850                         goto err_mem;
851                 }
852                 caps->spec_qps[i - 1] = func_cap->spec_qps;
853                 caps->port_mask[i] = caps->port_type[i];
854                 caps->phys_port_id[i] = func_cap->phys_port_id;
855                 err = mlx4_get_slave_pkey_gid_tbl_len(dev, i,
856                                                       &caps->gid_table_len[i],
857                                                       &caps->pkey_table_len[i]);
858                 if (err) {
859                         mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n",
860                                  i, err);
861                         goto err_mem;
862                 }
863         }
864
865 err_mem:
866         if (err)
867                 mlx4_slave_destroy_special_qp_cap(dev);
868         kfree(func_cap);
869         return err;
870 }
871
872 static int mlx4_slave_cap(struct mlx4_dev *dev)
873 {
874         int                        err;
875         u32                        page_size;
876         struct mlx4_dev_cap        *dev_cap = NULL;
877         struct mlx4_func_cap       *func_cap = NULL;
878         struct mlx4_init_hca_param *hca_param = NULL;
879
880         hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL);
881         func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL);
882         dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
883         if (!hca_param || !func_cap || !dev_cap) {
884                 mlx4_err(dev, "Failed to allocate memory for slave_cap\n");
885                 err = -ENOMEM;
886                 goto free_mem;
887         }
888
889         err = mlx4_QUERY_HCA(dev, hca_param);
890         if (err) {
891                 mlx4_err(dev, "QUERY_HCA command failed, aborting\n");
892                 goto free_mem;
893         }
894
895         /* fail if the hca has an unknown global capability
896          * at this time global_caps should be always zeroed
897          */
898         if (hca_param->global_caps) {
899                 mlx4_err(dev, "Unknown hca global capabilities\n");
900                 err = -EINVAL;
901                 goto free_mem;
902         }
903
904         dev->caps.hca_core_clock = hca_param->hca_core_clock;
905
906         dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp;
907         err = mlx4_dev_cap(dev, dev_cap);
908         if (err) {
909                 mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
910                 goto free_mem;
911         }
912
913         err = mlx4_QUERY_FW(dev);
914         if (err)
915                 mlx4_err(dev, "QUERY_FW command failed: could not get FW version\n");
916
917         page_size = ~dev->caps.page_size_cap + 1;
918         mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
919         if (page_size > PAGE_SIZE) {
920                 mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
921                          page_size, PAGE_SIZE);
922                 err = -ENODEV;
923                 goto free_mem;
924         }
925
926         /* Set uar_page_shift for VF */
927         dev->uar_page_shift = hca_param->uar_page_sz + 12;
928
929         /* Make sure the master uar page size is valid */
930         if (dev->uar_page_shift > PAGE_SHIFT) {
931                 mlx4_err(dev,
932                          "Invalid configuration: uar page size is larger than system page size\n");
933                 err = -ENODEV;
934                 goto free_mem;
935         }
936
937         /* Set reserved_uars based on the uar_page_shift */
938         mlx4_set_num_reserved_uars(dev, dev_cap);
939
940         /* Although uar page size in FW differs from system page size,
941          * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core)
942          * still works with assumption that uar page size == system page size
943          */
944         dev->caps.uar_page_size = PAGE_SIZE;
945
946         err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap);
947         if (err) {
948                 mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n",
949                          err);
950                 goto free_mem;
951         }
952
953         if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
954             PF_CONTEXT_BEHAVIOUR_MASK) {
955                 mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
956                          func_cap->pf_context_behaviour,
957                          PF_CONTEXT_BEHAVIOUR_MASK);
958                 err = -EINVAL;
959                 goto free_mem;
960         }
961
962         dev->caps.num_ports             = func_cap->num_ports;
963         dev->quotas.qp                  = func_cap->qp_quota;
964         dev->quotas.srq                 = func_cap->srq_quota;
965         dev->quotas.cq                  = func_cap->cq_quota;
966         dev->quotas.mpt                 = func_cap->mpt_quota;
967         dev->quotas.mtt                 = func_cap->mtt_quota;
968         dev->caps.num_qps               = 1 << hca_param->log_num_qps;
969         dev->caps.num_srqs              = 1 << hca_param->log_num_srqs;
970         dev->caps.num_cqs               = 1 << hca_param->log_num_cqs;
971         dev->caps.num_mpts              = 1 << hca_param->log_mpt_sz;
972         dev->caps.num_eqs               = func_cap->max_eq;
973         dev->caps.reserved_eqs          = func_cap->reserved_eq;
974         dev->caps.reserved_lkey         = func_cap->reserved_lkey;
975         dev->caps.num_pds               = MLX4_NUM_PDS;
976         dev->caps.num_mgms              = 0;
977         dev->caps.num_amgms             = 0;
978
979         if (dev->caps.num_ports > MLX4_MAX_PORTS) {
980                 mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n",
981                          dev->caps.num_ports, MLX4_MAX_PORTS);
982                 err = -ENODEV;
983                 goto free_mem;
984         }
985
986         mlx4_replace_zero_macs(dev);
987
988         err = mlx4_slave_special_qp_cap(dev);
989         if (err) {
990                 mlx4_err(dev, "Set special QP caps failed. aborting\n");
991                 goto free_mem;
992         }
993
994         if (dev->caps.uar_page_size * (dev->caps.num_uars -
995                                        dev->caps.reserved_uars) >
996                                        pci_resource_len(dev->persist->pdev,
997                                                         2)) {
998                 mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
999                          dev->caps.uar_page_size * dev->caps.num_uars,
1000                          (unsigned long long)
1001                          pci_resource_len(dev->persist->pdev, 2));
1002                 err = -ENOMEM;
1003                 goto err_mem;
1004         }
1005
1006         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) {
1007                 dev->caps.eqe_size   = 64;
1008                 dev->caps.eqe_factor = 1;
1009         } else {
1010                 dev->caps.eqe_size   = 32;
1011                 dev->caps.eqe_factor = 0;
1012         }
1013
1014         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
1015                 dev->caps.cqe_size   = 64;
1016                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
1017         } else {
1018                 dev->caps.cqe_size   = 32;
1019         }
1020
1021         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
1022                 dev->caps.eqe_size = hca_param->eqe_size;
1023                 dev->caps.eqe_factor = 0;
1024         }
1025
1026         if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
1027                 dev->caps.cqe_size = hca_param->cqe_size;
1028                 /* User still need to know when CQE > 32B */
1029                 dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
1030         }
1031
1032         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
1033         mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
1034
1035         dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN;
1036         mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n");
1037
1038         slave_adjust_steering_mode(dev, dev_cap, hca_param);
1039         mlx4_dbg(dev, "RSS support for IP fragments is %s\n",
1040                  hca_param->rss_ip_frags ? "on" : "off");
1041
1042         if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
1043             dev->caps.bf_reg_size)
1044                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
1045
1046         if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
1047                 dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
1048
1049 err_mem:
1050         if (err)
1051                 mlx4_slave_destroy_special_qp_cap(dev);
1052 free_mem:
1053         kfree(hca_param);
1054         kfree(func_cap);
1055         kfree(dev_cap);
1056         return err;
1057 }
1058
1059 static void mlx4_request_modules(struct mlx4_dev *dev)
1060 {
1061         int port;
1062         int has_ib_port = false;
1063         int has_eth_port = false;
1064 #define EN_DRV_NAME     "mlx4_en"
1065 #define IB_DRV_NAME     "mlx4_ib"
1066
1067         for (port = 1; port <= dev->caps.num_ports; port++) {
1068                 if (dev->caps.port_type[port] == MLX4_PORT_TYPE_IB)
1069                         has_ib_port = true;
1070                 else if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
1071                         has_eth_port = true;
1072         }
1073
1074         if (has_eth_port)
1075                 request_module_nowait(EN_DRV_NAME);
1076         if (has_ib_port || (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
1077                 request_module_nowait(IB_DRV_NAME);
1078 }
1079
1080 /*
1081  * Change the port configuration of the device.
1082  * Every user of this function must hold the port mutex.
1083  */
1084 int mlx4_change_port_types(struct mlx4_dev *dev,
1085                            enum mlx4_port_type *port_types)
1086 {
1087         int err = 0;
1088         int change = 0;
1089         int port;
1090
1091         for (port = 0; port <  dev->caps.num_ports; port++) {
1092                 /* Change the port type only if the new type is different
1093                  * from the current, and not set to Auto */
1094                 if (port_types[port] != dev->caps.port_type[port + 1])
1095                         change = 1;
1096         }
1097         if (change) {
1098                 mlx4_unregister_device(dev);
1099                 for (port = 1; port <= dev->caps.num_ports; port++) {
1100                         mlx4_CLOSE_PORT(dev, port);
1101                         dev->caps.port_type[port] = port_types[port - 1];
1102                         err = mlx4_SET_PORT(dev, port, -1);
1103                         if (err) {
1104                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
1105                                          port);
1106                                 goto out;
1107                         }
1108                 }
1109                 mlx4_set_port_mask(dev);
1110                 err = mlx4_register_device(dev);
1111                 if (err) {
1112                         mlx4_err(dev, "Failed to register device\n");
1113                         goto out;
1114                 }
1115                 mlx4_request_modules(dev);
1116         }
1117
1118 out:
1119         return err;
1120 }
1121
1122 static ssize_t show_port_type(struct device *dev,
1123                               struct device_attribute *attr,
1124                               char *buf)
1125 {
1126         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1127                                                    port_attr);
1128         struct mlx4_dev *mdev = info->dev;
1129         char type[8];
1130
1131         sprintf(type, "%s",
1132                 (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_IB) ?
1133                 "ib" : "eth");
1134         if (mdev->caps.possible_type[info->port] == MLX4_PORT_TYPE_AUTO)
1135                 sprintf(buf, "auto (%s)\n", type);
1136         else
1137                 sprintf(buf, "%s\n", type);
1138
1139         return strlen(buf);
1140 }
1141
1142 static int __set_port_type(struct mlx4_port_info *info,
1143                            enum mlx4_port_type port_type)
1144 {
1145         struct mlx4_dev *mdev = info->dev;
1146         struct mlx4_priv *priv = mlx4_priv(mdev);
1147         enum mlx4_port_type types[MLX4_MAX_PORTS];
1148         enum mlx4_port_type new_types[MLX4_MAX_PORTS];
1149         int i;
1150         int err = 0;
1151
1152         if ((port_type & mdev->caps.supported_type[info->port]) != port_type) {
1153                 mlx4_err(mdev,
1154                          "Requested port type for port %d is not supported on this HCA\n",
1155                          info->port);
1156                 err = -EINVAL;
1157                 goto err_sup;
1158         }
1159
1160         mlx4_stop_sense(mdev);
1161         mutex_lock(&priv->port_mutex);
1162         info->tmp_type = port_type;
1163
1164         /* Possible type is always the one that was delivered */
1165         mdev->caps.possible_type[info->port] = info->tmp_type;
1166
1167         for (i = 0; i < mdev->caps.num_ports; i++) {
1168                 types[i] = priv->port[i+1].tmp_type ? priv->port[i+1].tmp_type :
1169                                         mdev->caps.possible_type[i+1];
1170                 if (types[i] == MLX4_PORT_TYPE_AUTO)
1171                         types[i] = mdev->caps.port_type[i+1];
1172         }
1173
1174         if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
1175             !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
1176                 for (i = 1; i <= mdev->caps.num_ports; i++) {
1177                         if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
1178                                 mdev->caps.possible_type[i] = mdev->caps.port_type[i];
1179                                 err = -EINVAL;
1180                         }
1181                 }
1182         }
1183         if (err) {
1184                 mlx4_err(mdev, "Auto sensing is not supported on this HCA. Set only 'eth' or 'ib' for both ports (should be the same)\n");
1185                 goto out;
1186         }
1187
1188         mlx4_do_sense_ports(mdev, new_types, types);
1189
1190         err = mlx4_check_port_params(mdev, new_types);
1191         if (err)
1192                 goto out;
1193
1194         /* We are about to apply the changes after the configuration
1195          * was verified, no need to remember the temporary types
1196          * any more */
1197         for (i = 0; i < mdev->caps.num_ports; i++)
1198                 priv->port[i + 1].tmp_type = 0;
1199
1200         err = mlx4_change_port_types(mdev, new_types);
1201
1202 out:
1203         mlx4_start_sense(mdev);
1204         mutex_unlock(&priv->port_mutex);
1205 err_sup:
1206         return err;
1207 }
1208
1209 static ssize_t set_port_type(struct device *dev,
1210                              struct device_attribute *attr,
1211                              const char *buf, size_t count)
1212 {
1213         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1214                                                    port_attr);
1215         struct mlx4_dev *mdev = info->dev;
1216         enum mlx4_port_type port_type;
1217         static DEFINE_MUTEX(set_port_type_mutex);
1218         int err;
1219
1220         mutex_lock(&set_port_type_mutex);
1221
1222         if (!strcmp(buf, "ib\n")) {
1223                 port_type = MLX4_PORT_TYPE_IB;
1224         } else if (!strcmp(buf, "eth\n")) {
1225                 port_type = MLX4_PORT_TYPE_ETH;
1226         } else if (!strcmp(buf, "auto\n")) {
1227                 port_type = MLX4_PORT_TYPE_AUTO;
1228         } else {
1229                 mlx4_err(mdev, "%s is not supported port type\n", buf);
1230                 err = -EINVAL;
1231                 goto err_out;
1232         }
1233
1234         err = __set_port_type(info, port_type);
1235
1236 err_out:
1237         mutex_unlock(&set_port_type_mutex);
1238
1239         return err ? err : count;
1240 }
1241
1242 enum ibta_mtu {
1243         IB_MTU_256  = 1,
1244         IB_MTU_512  = 2,
1245         IB_MTU_1024 = 3,
1246         IB_MTU_2048 = 4,
1247         IB_MTU_4096 = 5
1248 };
1249
1250 static inline int int_to_ibta_mtu(int mtu)
1251 {
1252         switch (mtu) {
1253         case 256:  return IB_MTU_256;
1254         case 512:  return IB_MTU_512;
1255         case 1024: return IB_MTU_1024;
1256         case 2048: return IB_MTU_2048;
1257         case 4096: return IB_MTU_4096;
1258         default: return -1;
1259         }
1260 }
1261
1262 static inline int ibta_mtu_to_int(enum ibta_mtu mtu)
1263 {
1264         switch (mtu) {
1265         case IB_MTU_256:  return  256;
1266         case IB_MTU_512:  return  512;
1267         case IB_MTU_1024: return 1024;
1268         case IB_MTU_2048: return 2048;
1269         case IB_MTU_4096: return 4096;
1270         default: return -1;
1271         }
1272 }
1273
1274 static ssize_t show_port_ib_mtu(struct device *dev,
1275                              struct device_attribute *attr,
1276                              char *buf)
1277 {
1278         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1279                                                    port_mtu_attr);
1280         struct mlx4_dev *mdev = info->dev;
1281
1282         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH)
1283                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1284
1285         sprintf(buf, "%d\n",
1286                         ibta_mtu_to_int(mdev->caps.port_ib_mtu[info->port]));
1287         return strlen(buf);
1288 }
1289
1290 static ssize_t set_port_ib_mtu(struct device *dev,
1291                              struct device_attribute *attr,
1292                              const char *buf, size_t count)
1293 {
1294         struct mlx4_port_info *info = container_of(attr, struct mlx4_port_info,
1295                                                    port_mtu_attr);
1296         struct mlx4_dev *mdev = info->dev;
1297         struct mlx4_priv *priv = mlx4_priv(mdev);
1298         int err, port, mtu, ibta_mtu = -1;
1299
1300         if (mdev->caps.port_type[info->port] == MLX4_PORT_TYPE_ETH) {
1301                 mlx4_warn(mdev, "port level mtu is only used for IB ports\n");
1302                 return -EINVAL;
1303         }
1304
1305         err = kstrtoint(buf, 0, &mtu);
1306         if (!err)
1307                 ibta_mtu = int_to_ibta_mtu(mtu);
1308
1309         if (err || ibta_mtu < 0) {
1310                 mlx4_err(mdev, "%s is invalid IBTA mtu\n", buf);
1311                 return -EINVAL;
1312         }
1313
1314         mdev->caps.port_ib_mtu[info->port] = ibta_mtu;
1315
1316         mlx4_stop_sense(mdev);
1317         mutex_lock(&priv->port_mutex);
1318         mlx4_unregister_device(mdev);
1319         for (port = 1; port <= mdev->caps.num_ports; port++) {
1320                 mlx4_CLOSE_PORT(mdev, port);
1321                 err = mlx4_SET_PORT(mdev, port, -1);
1322                 if (err) {
1323                         mlx4_err(mdev, "Failed to set port %d, aborting\n",
1324                                  port);
1325                         goto err_set_port;
1326                 }
1327         }
1328         err = mlx4_register_device(mdev);
1329 err_set_port:
1330         mutex_unlock(&priv->port_mutex);
1331         mlx4_start_sense(mdev);
1332         return err ? err : count;
1333 }
1334
1335 /* bond for multi-function device */
1336 #define MAX_MF_BOND_ALLOWED_SLAVES 63
1337 static int mlx4_mf_bond(struct mlx4_dev *dev)
1338 {
1339         int err = 0;
1340         int nvfs;
1341         struct mlx4_slaves_pport slaves_port1;
1342         struct mlx4_slaves_pport slaves_port2;
1343         DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);
1344
1345         slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
1346         slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
1347         bitmap_and(slaves_port_1_2,
1348                    slaves_port1.slaves, slaves_port2.slaves,
1349                    dev->persist->num_vfs + 1);
1350
1351         /* only single port vfs are allowed */
1352         if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
1353                 mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
1354                 return -EINVAL;
1355         }
1356
1357         /* number of virtual functions is number of total functions minus one
1358          * physical function for each port.
1359          */
1360         nvfs = bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
1361                 bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1) - 2;
1362
1363         /* limit on maximum allowed VFs */
1364         if (nvfs > MAX_MF_BOND_ALLOWED_SLAVES) {
1365                 mlx4_warn(dev, "HA mode is not supported for %d VFs (max %d are allowed)\n",
1366                           nvfs, MAX_MF_BOND_ALLOWED_SLAVES);
1367                 return -EINVAL;
1368         }
1369
1370         if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
1371                 mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
1372                 return -EINVAL;
1373         }
1374
1375         err = mlx4_bond_mac_table(dev);
1376         if (err)
1377                 return err;
1378         err = mlx4_bond_vlan_table(dev);
1379         if (err)
1380                 goto err1;
1381         err = mlx4_bond_fs_rules(dev);
1382         if (err)
1383                 goto err2;
1384
1385         return 0;
1386 err2:
1387         (void)mlx4_unbond_vlan_table(dev);
1388 err1:
1389         (void)mlx4_unbond_mac_table(dev);
1390         return err;
1391 }
1392
1393 static int mlx4_mf_unbond(struct mlx4_dev *dev)
1394 {
1395         int ret, ret1;
1396
1397         ret = mlx4_unbond_fs_rules(dev);
1398         if (ret)
1399                 mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret);
1400         ret1 = mlx4_unbond_mac_table(dev);
1401         if (ret1) {
1402                 mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
1403                 ret = ret1;
1404         }
1405         ret1 = mlx4_unbond_vlan_table(dev);
1406         if (ret1) {
1407                 mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
1408                 ret = ret1;
1409         }
1410         return ret;
1411 }
1412
1413 int mlx4_bond(struct mlx4_dev *dev)
1414 {
1415         int ret = 0;
1416         struct mlx4_priv *priv = mlx4_priv(dev);
1417
1418         mutex_lock(&priv->bond_mutex);
1419
1420         if (!mlx4_is_bonded(dev)) {
1421                 ret = mlx4_do_bond(dev, true);
1422                 if (ret)
1423                         mlx4_err(dev, "Failed to bond device: %d\n", ret);
1424                 if (!ret && mlx4_is_master(dev)) {
1425                         ret = mlx4_mf_bond(dev);
1426                         if (ret) {
1427                                 mlx4_err(dev, "bond for multifunction failed\n");
1428                                 mlx4_do_bond(dev, false);
1429                         }
1430                 }
1431         }
1432
1433         mutex_unlock(&priv->bond_mutex);
1434         if (!ret)
1435                 mlx4_dbg(dev, "Device is bonded\n");
1436
1437         return ret;
1438 }
1439 EXPORT_SYMBOL_GPL(mlx4_bond);
1440
1441 int mlx4_unbond(struct mlx4_dev *dev)
1442 {
1443         int ret = 0;
1444         struct mlx4_priv *priv = mlx4_priv(dev);
1445
1446         mutex_lock(&priv->bond_mutex);
1447
1448         if (mlx4_is_bonded(dev)) {
1449                 int ret2 = 0;
1450
1451                 ret = mlx4_do_bond(dev, false);
1452                 if (ret)
1453                         mlx4_err(dev, "Failed to unbond device: %d\n", ret);
1454                 if (mlx4_is_master(dev))
1455                         ret2 = mlx4_mf_unbond(dev);
1456                 if (ret2) {
1457                         mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
1458                         ret = ret2;
1459                 }
1460         }
1461
1462         mutex_unlock(&priv->bond_mutex);
1463         if (!ret)
1464                 mlx4_dbg(dev, "Device is unbonded\n");
1465
1466         return ret;
1467 }
1468 EXPORT_SYMBOL_GPL(mlx4_unbond);
1469
1470
1471 int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
1472 {
1473         u8 port1 = v2p->port1;
1474         u8 port2 = v2p->port2;
1475         struct mlx4_priv *priv = mlx4_priv(dev);
1476         int err;
1477
1478         if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
1479                 return -EOPNOTSUPP;
1480
1481         mutex_lock(&priv->bond_mutex);
1482
1483         /* zero means keep current mapping for this port */
1484         if (port1 == 0)
1485                 port1 = priv->v2p.port1;
1486         if (port2 == 0)
1487                 port2 = priv->v2p.port2;
1488
1489         if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
1490             (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
1491             (port1 == 2 && port2 == 1)) {
1492                 /* besides boundary checks cross mapping makes
1493                  * no sense and therefore not allowed */
1494                 err = -EINVAL;
1495         } else if ((port1 == priv->v2p.port1) &&
1496                  (port2 == priv->v2p.port2)) {
1497                 err = 0;
1498         } else {
1499                 err = mlx4_virt2phy_port_map(dev, port1, port2);
1500                 if (!err) {
1501                         mlx4_dbg(dev, "port map changed: [%d][%d]\n",
1502                                  port1, port2);
1503                         priv->v2p.port1 = port1;
1504                         priv->v2p.port2 = port2;
1505                 } else {
1506                         mlx4_err(dev, "Failed to change port mape: %d\n", err);
1507                 }
1508         }
1509
1510         mutex_unlock(&priv->bond_mutex);
1511         return err;
1512 }
1513 EXPORT_SYMBOL_GPL(mlx4_port_map_set);
1514
1515 static int mlx4_load_fw(struct mlx4_dev *dev)
1516 {
1517         struct mlx4_priv *priv = mlx4_priv(dev);
1518         int err;
1519
1520         priv->fw.fw_icm = mlx4_alloc_icm(dev, priv->fw.fw_pages,
1521                                          GFP_HIGHUSER | __GFP_NOWARN, 0);
1522         if (!priv->fw.fw_icm) {
1523                 mlx4_err(dev, "Couldn't allocate FW area, aborting\n");
1524                 return -ENOMEM;
1525         }
1526
1527         err = mlx4_MAP_FA(dev, priv->fw.fw_icm);
1528         if (err) {
1529                 mlx4_err(dev, "MAP_FA command failed, aborting\n");
1530                 goto err_free;
1531         }
1532
1533         err = mlx4_RUN_FW(dev);
1534         if (err) {
1535                 mlx4_err(dev, "RUN_FW command failed, aborting\n");
1536                 goto err_unmap_fa;
1537         }
1538
1539         return 0;
1540
1541 err_unmap_fa:
1542         mlx4_UNMAP_FA(dev);
1543
1544 err_free:
1545         mlx4_free_icm(dev, priv->fw.fw_icm, 0);
1546         return err;
1547 }
1548
1549 static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
1550                                 int cmpt_entry_sz)
1551 {
1552         struct mlx4_priv *priv = mlx4_priv(dev);
1553         int err;
1554         int num_eqs;
1555
1556         err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
1557                                   cmpt_base +
1558                                   ((u64) (MLX4_CMPT_TYPE_QP *
1559                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1560                                   cmpt_entry_sz, dev->caps.num_qps,
1561                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1562                                   0, 0);
1563         if (err)
1564                 goto err;
1565
1566         err = mlx4_init_icm_table(dev, &priv->srq_table.cmpt_table,
1567                                   cmpt_base +
1568                                   ((u64) (MLX4_CMPT_TYPE_SRQ *
1569                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1570                                   cmpt_entry_sz, dev->caps.num_srqs,
1571                                   dev->caps.reserved_srqs, 0, 0);
1572         if (err)
1573                 goto err_qp;
1574
1575         err = mlx4_init_icm_table(dev, &priv->cq_table.cmpt_table,
1576                                   cmpt_base +
1577                                   ((u64) (MLX4_CMPT_TYPE_CQ *
1578                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1579                                   cmpt_entry_sz, dev->caps.num_cqs,
1580                                   dev->caps.reserved_cqs, 0, 0);
1581         if (err)
1582                 goto err_srq;
1583
1584         num_eqs = dev->phys_caps.num_phys_eqs;
1585         err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
1586                                   cmpt_base +
1587                                   ((u64) (MLX4_CMPT_TYPE_EQ *
1588                                           cmpt_entry_sz) << MLX4_CMPT_SHIFT),
1589                                   cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
1590         if (err)
1591                 goto err_cq;
1592
1593         return 0;
1594
1595 err_cq:
1596         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1597
1598 err_srq:
1599         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1600
1601 err_qp:
1602         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1603
1604 err:
1605         return err;
1606 }
1607
1608 static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
1609                          struct mlx4_init_hca_param *init_hca, u64 icm_size)
1610 {
1611         struct mlx4_priv *priv = mlx4_priv(dev);
1612         u64 aux_pages;
1613         int num_eqs;
1614         int err;
1615
1616         err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
1617         if (err) {
1618                 mlx4_err(dev, "SET_ICM_SIZE command failed, aborting\n");
1619                 return err;
1620         }
1621
1622         mlx4_dbg(dev, "%lld KB of HCA context requires %lld KB aux memory\n",
1623                  (unsigned long long) icm_size >> 10,
1624                  (unsigned long long) aux_pages << 2);
1625
1626         priv->fw.aux_icm = mlx4_alloc_icm(dev, aux_pages,
1627                                           GFP_HIGHUSER | __GFP_NOWARN, 0);
1628         if (!priv->fw.aux_icm) {
1629                 mlx4_err(dev, "Couldn't allocate aux memory, aborting\n");
1630                 return -ENOMEM;
1631         }
1632
1633         err = mlx4_MAP_ICM_AUX(dev, priv->fw.aux_icm);
1634         if (err) {
1635                 mlx4_err(dev, "MAP_ICM_AUX command failed, aborting\n");
1636                 goto err_free_aux;
1637         }
1638
1639         err = mlx4_init_cmpt_table(dev, init_hca->cmpt_base, dev_cap->cmpt_entry_sz);
1640         if (err) {
1641                 mlx4_err(dev, "Failed to map cMPT context memory, aborting\n");
1642                 goto err_unmap_aux;
1643         }
1644
1645
1646         num_eqs = dev->phys_caps.num_phys_eqs;
1647         err = mlx4_init_icm_table(dev, &priv->eq_table.table,
1648                                   init_hca->eqc_base, dev_cap->eqc_entry_sz,
1649                                   num_eqs, num_eqs, 0, 0);
1650         if (err) {
1651                 mlx4_err(dev, "Failed to map EQ context memory, aborting\n");
1652                 goto err_unmap_cmpt;
1653         }
1654
1655         /*
1656          * Reserved MTT entries must be aligned up to a cacheline
1657          * boundary, since the FW will write to them, while the driver
1658          * writes to all other MTT entries. (The variable
1659          * dev->caps.mtt_entry_sz below is really the MTT segment
1660          * size, not the raw entry size)
1661          */
1662         dev->caps.reserved_mtts =
1663                 ALIGN(dev->caps.reserved_mtts * dev->caps.mtt_entry_sz,
1664                       dma_get_cache_alignment()) / dev->caps.mtt_entry_sz;
1665
1666         err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
1667                                   init_hca->mtt_base,
1668                                   dev->caps.mtt_entry_sz,
1669                                   dev->caps.num_mtts,
1670                                   dev->caps.reserved_mtts, 1, 0);
1671         if (err) {
1672                 mlx4_err(dev, "Failed to map MTT context memory, aborting\n");
1673                 goto err_unmap_eq;
1674         }
1675
1676         err = mlx4_init_icm_table(dev, &priv->mr_table.dmpt_table,
1677                                   init_hca->dmpt_base,
1678                                   dev_cap->dmpt_entry_sz,
1679                                   dev->caps.num_mpts,
1680                                   dev->caps.reserved_mrws, 1, 1);
1681         if (err) {
1682                 mlx4_err(dev, "Failed to map dMPT context memory, aborting\n");
1683                 goto err_unmap_mtt;
1684         }
1685
1686         err = mlx4_init_icm_table(dev, &priv->qp_table.qp_table,
1687                                   init_hca->qpc_base,
1688                                   dev_cap->qpc_entry_sz,
1689                                   dev->caps.num_qps,
1690                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1691                                   0, 0);
1692         if (err) {
1693                 mlx4_err(dev, "Failed to map QP context memory, aborting\n");
1694                 goto err_unmap_dmpt;
1695         }
1696
1697         err = mlx4_init_icm_table(dev, &priv->qp_table.auxc_table,
1698                                   init_hca->auxc_base,
1699                                   dev_cap->aux_entry_sz,
1700                                   dev->caps.num_qps,
1701                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1702                                   0, 0);
1703         if (err) {
1704                 mlx4_err(dev, "Failed to map AUXC context memory, aborting\n");
1705                 goto err_unmap_qp;
1706         }
1707
1708         err = mlx4_init_icm_table(dev, &priv->qp_table.altc_table,
1709                                   init_hca->altc_base,
1710                                   dev_cap->altc_entry_sz,
1711                                   dev->caps.num_qps,
1712                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1713                                   0, 0);
1714         if (err) {
1715                 mlx4_err(dev, "Failed to map ALTC context memory, aborting\n");
1716                 goto err_unmap_auxc;
1717         }
1718
1719         err = mlx4_init_icm_table(dev, &priv->qp_table.rdmarc_table,
1720                                   init_hca->rdmarc_base,
1721                                   dev_cap->rdmarc_entry_sz << priv->qp_table.rdmarc_shift,
1722                                   dev->caps.num_qps,
1723                                   dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
1724                                   0, 0);
1725         if (err) {
1726                 mlx4_err(dev, "Failed to map RDMARC context memory, aborting\n");
1727                 goto err_unmap_altc;
1728         }
1729
1730         err = mlx4_init_icm_table(dev, &priv->cq_table.table,
1731                                   init_hca->cqc_base,
1732                                   dev_cap->cqc_entry_sz,
1733                                   dev->caps.num_cqs,
1734                                   dev->caps.reserved_cqs, 0, 0);
1735         if (err) {
1736                 mlx4_err(dev, "Failed to map CQ context memory, aborting\n");
1737                 goto err_unmap_rdmarc;
1738         }
1739
1740         err = mlx4_init_icm_table(dev, &priv->srq_table.table,
1741                                   init_hca->srqc_base,
1742                                   dev_cap->srq_entry_sz,
1743                                   dev->caps.num_srqs,
1744                                   dev->caps.reserved_srqs, 0, 0);
1745         if (err) {
1746                 mlx4_err(dev, "Failed to map SRQ context memory, aborting\n");
1747                 goto err_unmap_cq;
1748         }
1749
1750         /*
1751          * For flow steering device managed mode it is required to use
1752          * mlx4_init_icm_table. For B0 steering mode it's not strictly
1753          * required, but for simplicity just map the whole multicast
1754          * group table now.  The table isn't very big and it's a lot
1755          * easier than trying to track ref counts.
1756          */
1757         err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
1758                                   init_hca->mc_base,
1759                                   mlx4_get_mgm_entry_size(dev),
1760                                   dev->caps.num_mgms + dev->caps.num_amgms,
1761                                   dev->caps.num_mgms + dev->caps.num_amgms,
1762                                   0, 0);
1763         if (err) {
1764                 mlx4_err(dev, "Failed to map MCG context memory, aborting\n");
1765                 goto err_unmap_srq;
1766         }
1767
1768         return 0;
1769
1770 err_unmap_srq:
1771         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1772
1773 err_unmap_cq:
1774         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1775
1776 err_unmap_rdmarc:
1777         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1778
1779 err_unmap_altc:
1780         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1781
1782 err_unmap_auxc:
1783         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1784
1785 err_unmap_qp:
1786         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1787
1788 err_unmap_dmpt:
1789         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1790
1791 err_unmap_mtt:
1792         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1793
1794 err_unmap_eq:
1795         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1796
1797 err_unmap_cmpt:
1798         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1799         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1800         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1801         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1802
1803 err_unmap_aux:
1804         mlx4_UNMAP_ICM_AUX(dev);
1805
1806 err_free_aux:
1807         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1808
1809         return err;
1810 }
1811
1812 static void mlx4_free_icms(struct mlx4_dev *dev)
1813 {
1814         struct mlx4_priv *priv = mlx4_priv(dev);
1815
1816         mlx4_cleanup_icm_table(dev, &priv->mcg_table.table);
1817         mlx4_cleanup_icm_table(dev, &priv->srq_table.table);
1818         mlx4_cleanup_icm_table(dev, &priv->cq_table.table);
1819         mlx4_cleanup_icm_table(dev, &priv->qp_table.rdmarc_table);
1820         mlx4_cleanup_icm_table(dev, &priv->qp_table.altc_table);
1821         mlx4_cleanup_icm_table(dev, &priv->qp_table.auxc_table);
1822         mlx4_cleanup_icm_table(dev, &priv->qp_table.qp_table);
1823         mlx4_cleanup_icm_table(dev, &priv->mr_table.dmpt_table);
1824         mlx4_cleanup_icm_table(dev, &priv->mr_table.mtt_table);
1825         mlx4_cleanup_icm_table(dev, &priv->eq_table.table);
1826         mlx4_cleanup_icm_table(dev, &priv->eq_table.cmpt_table);
1827         mlx4_cleanup_icm_table(dev, &priv->cq_table.cmpt_table);
1828         mlx4_cleanup_icm_table(dev, &priv->srq_table.cmpt_table);
1829         mlx4_cleanup_icm_table(dev, &priv->qp_table.cmpt_table);
1830
1831         mlx4_UNMAP_ICM_AUX(dev);
1832         mlx4_free_icm(dev, priv->fw.aux_icm, 0);
1833 }
1834
1835 static void mlx4_slave_exit(struct mlx4_dev *dev)
1836 {
1837         struct mlx4_priv *priv = mlx4_priv(dev);
1838
1839         mutex_lock(&priv->cmd.slave_cmd_mutex);
1840         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP,
1841                           MLX4_COMM_TIME))
1842                 mlx4_warn(dev, "Failed to close slave function\n");
1843         mutex_unlock(&priv->cmd.slave_cmd_mutex);
1844 }
1845
1846 static int map_bf_area(struct mlx4_dev *dev)
1847 {
1848         struct mlx4_priv *priv = mlx4_priv(dev);
1849         resource_size_t bf_start;
1850         resource_size_t bf_len;
1851         int err = 0;
1852
1853         if (!dev->caps.bf_reg_size)
1854                 return -ENXIO;
1855
1856         bf_start = pci_resource_start(dev->persist->pdev, 2) +
1857                         (dev->caps.num_uars << PAGE_SHIFT);
1858         bf_len = pci_resource_len(dev->persist->pdev, 2) -
1859                         (dev->caps.num_uars << PAGE_SHIFT);
1860         priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
1861         if (!priv->bf_mapping)
1862                 err = -ENOMEM;
1863
1864         return err;
1865 }
1866
1867 static void unmap_bf_area(struct mlx4_dev *dev)
1868 {
1869         if (mlx4_priv(dev)->bf_mapping)
1870                 io_mapping_free(mlx4_priv(dev)->bf_mapping);
1871 }
1872
1873 u64 mlx4_read_clock(struct mlx4_dev *dev)
1874 {
1875         u32 clockhi, clocklo, clockhi1;
1876         u64 cycles;
1877         int i;
1878         struct mlx4_priv *priv = mlx4_priv(dev);
1879
1880         for (i = 0; i < 10; i++) {
1881                 clockhi = swab32(readl(priv->clock_mapping));
1882                 clocklo = swab32(readl(priv->clock_mapping + 4));
1883                 clockhi1 = swab32(readl(priv->clock_mapping));
1884                 if (clockhi == clockhi1)
1885                         break;
1886         }
1887
1888         cycles = (u64) clockhi << 32 | (u64) clocklo;
1889
1890         return cycles;
1891 }
1892 EXPORT_SYMBOL_GPL(mlx4_read_clock);
1893
1894
1895 static int map_internal_clock(struct mlx4_dev *dev)
1896 {
1897         struct mlx4_priv *priv = mlx4_priv(dev);
1898
1899         priv->clock_mapping =
1900                 ioremap(pci_resource_start(dev->persist->pdev,
1901                                            priv->fw.clock_bar) +
1902                         priv->fw.clock_offset, MLX4_CLOCK_SIZE);
1903
1904         if (!priv->clock_mapping)
1905                 return -ENOMEM;
1906
1907         return 0;
1908 }
1909
1910 int mlx4_get_internal_clock_params(struct mlx4_dev *dev,
1911                                    struct mlx4_clock_params *params)
1912 {
1913         struct mlx4_priv *priv = mlx4_priv(dev);
1914
1915         if (mlx4_is_slave(dev))
1916                 return -EOPNOTSUPP;
1917
1918         if (!params)
1919                 return -EINVAL;
1920
1921         params->bar = priv->fw.clock_bar;
1922         params->offset = priv->fw.clock_offset;
1923         params->size = MLX4_CLOCK_SIZE;
1924
1925         return 0;
1926 }
1927 EXPORT_SYMBOL_GPL(mlx4_get_internal_clock_params);
1928
1929 static void unmap_internal_clock(struct mlx4_dev *dev)
1930 {
1931         struct mlx4_priv *priv = mlx4_priv(dev);
1932
1933         if (priv->clock_mapping)
1934                 iounmap(priv->clock_mapping);
1935 }
1936
1937 static void mlx4_close_hca(struct mlx4_dev *dev)
1938 {
1939         unmap_internal_clock(dev);
1940         unmap_bf_area(dev);
1941         if (mlx4_is_slave(dev))
1942                 mlx4_slave_exit(dev);
1943         else {
1944                 mlx4_CLOSE_HCA(dev, 0);
1945                 mlx4_free_icms(dev);
1946         }
1947 }
1948
1949 static void mlx4_close_fw(struct mlx4_dev *dev)
1950 {
1951         if (!mlx4_is_slave(dev)) {
1952                 mlx4_UNMAP_FA(dev);
1953                 mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
1954         }
1955 }
1956
1957 static int mlx4_comm_check_offline(struct mlx4_dev *dev)
1958 {
1959 #define COMM_CHAN_OFFLINE_OFFSET 0x09
1960
1961         u32 comm_flags;
1962         u32 offline_bit;
1963         unsigned long end;
1964         struct mlx4_priv *priv = mlx4_priv(dev);
1965
1966         end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
1967         while (time_before(jiffies, end)) {
1968                 comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
1969                                           MLX4_COMM_CHAN_FLAGS));
1970                 offline_bit = (comm_flags &
1971                                (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
1972                 if (!offline_bit)
1973                         return 0;
1974
1975                 /* If device removal has been requested,
1976                  * do not continue retrying.
1977                  */
1978                 if (dev->persist->interface_state &
1979                     MLX4_INTERFACE_STATE_NOWAIT)
1980                         break;
1981
1982                 /* There are cases as part of AER/Reset flow that PF needs
1983                  * around 100 msec to load. We therefore sleep for 100 msec
1984                  * to allow other tasks to make use of that CPU during this
1985                  * time interval.
1986                  */
1987                 msleep(100);
1988         }
1989         mlx4_err(dev, "Communication channel is offline.\n");
1990         return -EIO;
1991 }
1992
1993 static void mlx4_reset_vf_support(struct mlx4_dev *dev)
1994 {
1995 #define COMM_CHAN_RST_OFFSET 0x1e
1996
1997         struct mlx4_priv *priv = mlx4_priv(dev);
1998         u32 comm_rst;
1999         u32 comm_caps;
2000
2001         comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
2002                                  MLX4_COMM_CHAN_CAPS));
2003         comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
2004
2005         if (comm_rst)
2006                 dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
2007 }
2008
2009 static int mlx4_init_slave(struct mlx4_dev *dev)
2010 {
2011         struct mlx4_priv *priv = mlx4_priv(dev);
2012         u64 dma = (u64) priv->mfunc.vhcr_dma;
2013         int ret_from_reset = 0;
2014         u32 slave_read;
2015         u32 cmd_channel_ver;
2016
2017         if (atomic_read(&pf_loading)) {
2018                 mlx4_warn(dev, "PF is not ready - Deferring probe\n");
2019                 return -EPROBE_DEFER;
2020         }
2021
2022         mutex_lock(&priv->cmd.slave_cmd_mutex);
2023         priv->cmd.max_cmds = 1;
2024         if (mlx4_comm_check_offline(dev)) {
2025                 mlx4_err(dev, "PF is not responsive, skipping initialization\n");
2026                 goto err_offline;
2027         }
2028
2029         mlx4_reset_vf_support(dev);
2030         mlx4_warn(dev, "Sending reset\n");
2031         ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
2032                                        MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME);
2033         /* if we are in the middle of flr the slave will try
2034          * NUM_OF_RESET_RETRIES times before leaving.*/
2035         if (ret_from_reset) {
2036                 if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
2037                         mlx4_warn(dev, "slave is currently in the middle of FLR - Deferring probe\n");
2038                         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2039                         return -EPROBE_DEFER;
2040                 } else
2041                         goto err;
2042         }
2043
2044         /* check the driver version - the slave I/F revision
2045          * must match the master's */
2046         slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
2047         cmd_channel_ver = mlx4_comm_get_version();
2048
2049         if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
2050                 MLX4_COMM_GET_IF_REV(slave_read)) {
2051                 mlx4_err(dev, "slave driver version is not supported by the master\n");
2052                 goto err;
2053         }
2054
2055         mlx4_warn(dev, "Sending vhcr0\n");
2056         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
2057                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2058                 goto err;
2059         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
2060                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2061                 goto err;
2062         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
2063                              MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2064                 goto err;
2065         if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma,
2066                           MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME))
2067                 goto err;
2068
2069         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2070         return 0;
2071
2072 err:
2073         mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0);
2074 err_offline:
2075         mutex_unlock(&priv->cmd.slave_cmd_mutex);
2076         return -EIO;
2077 }
2078
2079 static void mlx4_parav_master_pf_caps(struct mlx4_dev *dev)
2080 {
2081         int i;
2082
2083         for (i = 1; i <= dev->caps.num_ports; i++) {
2084                 if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
2085                         dev->caps.gid_table_len[i] =
2086                                 mlx4_get_slave_num_gids(dev, 0, i);
2087                 else
2088                         dev->caps.gid_table_len[i] = 1;
2089                 dev->caps.pkey_table_len[i] =
2090                         dev->phys_caps.pkey_phys_table_len[i] - 1;
2091         }
2092 }
2093
2094 static int choose_log_fs_mgm_entry_size(int qp_per_entry)
2095 {
2096         int i = MLX4_MIN_MGM_LOG_ENTRY_SIZE;
2097
2098         for (i = MLX4_MIN_MGM_LOG_ENTRY_SIZE; i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE;
2099               i++) {
2100                 if (qp_per_entry <= 4 * ((1 << i) / 16 - 2))
2101                         break;
2102         }
2103
2104         return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
2105 }
2106
2107 static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
2108 {
2109         switch (dmfs_high_steer_mode) {
2110         case MLX4_STEERING_DMFS_A0_DEFAULT:
2111                 return "default performance";
2112
2113         case MLX4_STEERING_DMFS_A0_DYNAMIC:
2114                 return "dynamic hybrid mode";
2115
2116         case MLX4_STEERING_DMFS_A0_STATIC:
2117                 return "performance optimized for limited rule configuration (static)";
2118
2119         case MLX4_STEERING_DMFS_A0_DISABLE:
2120                 return "disabled performance optimized steering";
2121
2122         case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
2123                 return "performance optimized steering not supported";
2124
2125         default:
2126                 return "Unrecognized mode";
2127         }
2128 }
2129
2130 #define MLX4_DMFS_A0_STEERING                   (1UL << 2)
2131
2132 static void choose_steering_mode(struct mlx4_dev *dev,
2133                                  struct mlx4_dev_cap *dev_cap)
2134 {
2135         if (mlx4_log_num_mgm_entry_size <= 0) {
2136                 if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
2137                         if (dev->caps.dmfs_high_steer_mode ==
2138                             MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2139                                 mlx4_err(dev, "DMFS high rate mode not supported\n");
2140                         else
2141                                 dev->caps.dmfs_high_steer_mode =
2142                                         MLX4_STEERING_DMFS_A0_STATIC;
2143                 }
2144         }
2145
2146         if (mlx4_log_num_mgm_entry_size <= 0 &&
2147             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
2148             (!mlx4_is_mfunc(dev) ||
2149              (dev_cap->fs_max_num_qp_per_entry >=
2150              (dev->persist->num_vfs + 1))) &&
2151             choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
2152                 MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
2153                 dev->oper_log_mgm_entry_size =
2154                         choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry);
2155                 dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
2156                 dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
2157                 dev->caps.fs_log_max_ucast_qp_range_size =
2158                         dev_cap->fs_log_max_ucast_qp_range_size;
2159         } else {
2160                 if (dev->caps.dmfs_high_steer_mode !=
2161                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2162                         dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
2163                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
2164                     dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2165                         dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
2166                 else {
2167                         dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
2168
2169                         if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
2170                             dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
2171                                 mlx4_warn(dev, "Must have both UC_STEER and MC_STEER flags set to use B0 steering - falling back to A0 steering mode\n");
2172                 }
2173                 dev->oper_log_mgm_entry_size =
2174                         mlx4_log_num_mgm_entry_size > 0 ?
2175                         mlx4_log_num_mgm_entry_size :
2176                         MLX4_DEFAULT_MGM_LOG_ENTRY_SIZE;
2177                 dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
2178         }
2179         mlx4_dbg(dev, "Steering mode is: %s, oper_log_mgm_entry_size = %d, modparam log_num_mgm_entry_size = %d\n",
2180                  mlx4_steering_mode_str(dev->caps.steering_mode),
2181                  dev->oper_log_mgm_entry_size,
2182                  mlx4_log_num_mgm_entry_size);
2183 }
2184
2185 static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
2186                                        struct mlx4_dev_cap *dev_cap)
2187 {
2188         if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2189             dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
2190                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
2191         else
2192                 dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
2193
2194         mlx4_dbg(dev, "Tunneling offload mode is: %s\n",  (dev->caps.tunnel_offload_mode
2195                  == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
2196 }
2197
2198 static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
2199 {
2200         int i;
2201         struct mlx4_port_cap port_cap;
2202
2203         if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
2204                 return -EINVAL;
2205
2206         for (i = 1; i <= dev->caps.num_ports; i++) {
2207                 if (mlx4_dev_port(dev, i, &port_cap)) {
2208                         mlx4_err(dev,
2209                                  "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
2210                 } else if ((dev->caps.dmfs_high_steer_mode !=
2211                             MLX4_STEERING_DMFS_A0_DEFAULT) &&
2212                            (port_cap.dmfs_optimized_state ==
2213                             !!(dev->caps.dmfs_high_steer_mode ==
2214                             MLX4_STEERING_DMFS_A0_DISABLE))) {
2215                         mlx4_err(dev,
2216                                  "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
2217                                  dmfs_high_rate_steering_mode_str(
2218                                         dev->caps.dmfs_high_steer_mode),
2219                                  (port_cap.dmfs_optimized_state ?
2220                                         "enabled" : "disabled"));
2221                 }
2222         }
2223
2224         return 0;
2225 }
2226
2227 static int mlx4_init_fw(struct mlx4_dev *dev)
2228 {
2229         struct mlx4_mod_stat_cfg   mlx4_cfg;
2230         int err = 0;
2231
2232         if (!mlx4_is_slave(dev)) {
2233                 err = mlx4_QUERY_FW(dev);
2234                 if (err) {
2235                         if (err == -EACCES)
2236                                 mlx4_info(dev, "non-primary physical function, skipping\n");
2237                         else
2238                                 mlx4_err(dev, "QUERY_FW command failed, aborting\n");
2239                         return err;
2240                 }
2241
2242                 err = mlx4_load_fw(dev);
2243                 if (err) {
2244                         mlx4_err(dev, "Failed to start FW, aborting\n");
2245                         return err;
2246                 }
2247
2248                 mlx4_cfg.log_pg_sz_m = 1;
2249                 mlx4_cfg.log_pg_sz = 0;
2250                 err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
2251                 if (err)
2252                         mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
2253         }
2254
2255         return err;
2256 }
2257
2258 static int mlx4_init_hca(struct mlx4_dev *dev)
2259 {
2260         struct mlx4_priv          *priv = mlx4_priv(dev);
2261         struct mlx4_adapter        adapter;
2262         struct mlx4_dev_cap        dev_cap;
2263         struct mlx4_profile        profile;
2264         struct mlx4_init_hca_param init_hca;
2265         u64 icm_size;
2266         struct mlx4_config_dev_params params;
2267         int err;
2268
2269         if (!mlx4_is_slave(dev)) {
2270                 err = mlx4_dev_cap(dev, &dev_cap);
2271                 if (err) {
2272                         mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
2273                         return err;
2274                 }
2275
2276                 choose_steering_mode(dev, &dev_cap);
2277                 choose_tunnel_offload_mode(dev, &dev_cap);
2278
2279                 if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
2280                     mlx4_is_master(dev))
2281                         dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
2282
2283                 err = mlx4_get_phys_port_id(dev);
2284                 if (err)
2285                         mlx4_err(dev, "Fail to get physical port id\n");
2286
2287                 if (mlx4_is_master(dev))
2288                         mlx4_parav_master_pf_caps(dev);
2289
2290                 if (mlx4_low_memory_profile()) {
2291                         mlx4_info(dev, "Running from within kdump kernel. Using low memory profile\n");
2292                         profile = low_mem_profile;
2293                 } else {
2294                         profile = default_profile;
2295                 }
2296                 if (dev->caps.steering_mode ==
2297                     MLX4_STEERING_MODE_DEVICE_MANAGED)
2298                         profile.num_mcg = MLX4_FS_NUM_MCG;
2299
2300                 icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
2301                                              &init_hca);
2302                 if ((long long) icm_size < 0) {
2303                         err = icm_size;
2304                         return err;
2305                 }
2306
2307                 dev->caps.max_fmr_maps = (1 << (32 - ilog2(dev->caps.num_mpts))) - 1;
2308
2309                 if (enable_4k_uar || !dev->persist->num_vfs) {
2310                         init_hca.log_uar_sz = ilog2(dev->caps.num_uars) +
2311                                                     PAGE_SHIFT - DEFAULT_UAR_PAGE_SHIFT;
2312                         init_hca.uar_page_sz = DEFAULT_UAR_PAGE_SHIFT - 12;
2313                 } else {
2314                         init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
2315                         init_hca.uar_page_sz = PAGE_SHIFT - 12;
2316                 }
2317
2318                 init_hca.mw_enabled = 0;
2319                 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2320                     dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN)
2321                         init_hca.mw_enabled = INIT_HCA_TPT_MW_ENABLE;
2322
2323                 err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
2324                 if (err)
2325                         return err;
2326
2327                 err = mlx4_INIT_HCA(dev, &init_hca);
2328                 if (err) {
2329                         mlx4_err(dev, "INIT_HCA command failed, aborting\n");
2330                         goto err_free_icm;
2331                 }
2332
2333                 if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
2334                         err = mlx4_query_func(dev, &dev_cap);
2335                         if (err < 0) {
2336                                 mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
2337                                 goto err_close;
2338                         } else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
2339                                 dev->caps.num_eqs = dev_cap.max_eqs;
2340                                 dev->caps.reserved_eqs = dev_cap.reserved_eqs;
2341                                 dev->caps.reserved_uars = dev_cap.reserved_uars;
2342                         }
2343                 }
2344
2345                 /*
2346                  * If TS is supported by FW
2347                  * read HCA frequency by QUERY_HCA command
2348                  */
2349                 if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) {
2350                         memset(&init_hca, 0, sizeof(init_hca));
2351                         err = mlx4_QUERY_HCA(dev, &init_hca);
2352                         if (err) {
2353                                 mlx4_err(dev, "QUERY_HCA command failed, disable timestamp\n");
2354                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2355                         } else {
2356                                 dev->caps.hca_core_clock =
2357                                         init_hca.hca_core_clock;
2358                         }
2359
2360                         /* In case we got HCA frequency 0 - disable timestamping
2361                          * to avoid dividing by zero
2362                          */
2363                         if (!dev->caps.hca_core_clock) {
2364                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2365                                 mlx4_err(dev,
2366                                          "HCA frequency is 0 - timestamping is not supported\n");
2367                         } else if (map_internal_clock(dev)) {
2368                                 /*
2369                                  * Map internal clock,
2370                                  * in case of failure disable timestamping
2371                                  */
2372                                 dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
2373                                 mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
2374                         }
2375                 }
2376
2377                 if (dev->caps.dmfs_high_steer_mode !=
2378                     MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
2379                         if (mlx4_validate_optimized_steering(dev))
2380                                 mlx4_warn(dev, "Optimized steering validation failed\n");
2381
2382                         if (dev->caps.dmfs_high_steer_mode ==
2383                             MLX4_STEERING_DMFS_A0_DISABLE) {
2384                                 dev->caps.dmfs_high_rate_qpn_base =
2385                                         dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
2386                                 dev->caps.dmfs_high_rate_qpn_range =
2387                                         MLX4_A0_STEERING_TABLE_SIZE;
2388                         }
2389
2390                         mlx4_info(dev, "DMFS high rate steer mode is: %s\n",
2391                                   dmfs_high_rate_steering_mode_str(
2392                                         dev->caps.dmfs_high_steer_mode));
2393                 }
2394         } else {
2395                 err = mlx4_init_slave(dev);
2396                 if (err) {
2397                         if (err != -EPROBE_DEFER)
2398                                 mlx4_err(dev, "Failed to initialize slave\n");
2399                         return err;
2400                 }
2401
2402                 err = mlx4_slave_cap(dev);
2403                 if (err) {
2404                         mlx4_err(dev, "Failed to obtain slave caps\n");
2405                         goto err_close;
2406                 }
2407         }
2408
2409         if (map_bf_area(dev))
2410                 mlx4_dbg(dev, "Failed to map blue flame area\n");
2411
2412         /*Only the master set the ports, all the rest got it from it.*/
2413         if (!mlx4_is_slave(dev))
2414                 mlx4_set_port_mask(dev);
2415
2416         err = mlx4_QUERY_ADAPTER(dev, &adapter);
2417         if (err) {
2418                 mlx4_err(dev, "QUERY_ADAPTER command failed, aborting\n");
2419                 goto unmap_bf;
2420         }
2421
2422         /* Query CONFIG_DEV parameters */
2423         err = mlx4_config_dev_retrieval(dev, &params);
2424         if (err && err != -EOPNOTSUPP) {
2425                 mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
2426         } else if (!err) {
2427                 dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
2428                 dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
2429         }
2430         priv->eq_table.inta_pin = adapter.inta_pin;
2431         memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id));
2432
2433         return 0;
2434
2435 unmap_bf:
2436         unmap_internal_clock(dev);
2437         unmap_bf_area(dev);
2438
2439         if (mlx4_is_slave(dev))
2440                 mlx4_slave_destroy_special_qp_cap(dev);
2441
2442 err_close:
2443         if (mlx4_is_slave(dev))
2444                 mlx4_slave_exit(dev);
2445         else
2446                 mlx4_CLOSE_HCA(dev, 0);
2447
2448 err_free_icm:
2449         if (!mlx4_is_slave(dev))
2450                 mlx4_free_icms(dev);
2451
2452         return err;
2453 }
2454
2455 static int mlx4_init_counters_table(struct mlx4_dev *dev)
2456 {
2457         struct mlx4_priv *priv = mlx4_priv(dev);
2458         int nent_pow2;
2459
2460         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2461                 return -ENOENT;
2462
2463         if (!dev->caps.max_counters)
2464                 return -ENOSPC;
2465
2466         nent_pow2 = roundup_pow_of_two(dev->caps.max_counters);
2467         /* reserve last counter index for sink counter */
2468         return mlx4_bitmap_init(&priv->counters_bitmap, nent_pow2,
2469                                 nent_pow2 - 1, 0,
2470                                 nent_pow2 - dev->caps.max_counters + 1);
2471 }
2472
2473 static void mlx4_cleanup_counters_table(struct mlx4_dev *dev)
2474 {
2475         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2476                 return;
2477
2478         if (!dev->caps.max_counters)
2479                 return;
2480
2481         mlx4_bitmap_cleanup(&mlx4_priv(dev)->counters_bitmap);
2482 }
2483
2484 static void mlx4_cleanup_default_counters(struct mlx4_dev *dev)
2485 {
2486         struct mlx4_priv *priv = mlx4_priv(dev);
2487         int port;
2488
2489         for (port = 0; port < dev->caps.num_ports; port++)
2490                 if (priv->def_counter[port] != -1)
2491                         mlx4_counter_free(dev,  priv->def_counter[port]);
2492 }
2493
2494 static int mlx4_allocate_default_counters(struct mlx4_dev *dev)
2495 {
2496         struct mlx4_priv *priv = mlx4_priv(dev);
2497         int port, err = 0;
2498         u32 idx;
2499
2500         for (port = 0; port < dev->caps.num_ports; port++)
2501                 priv->def_counter[port] = -1;
2502
2503         for (port = 0; port < dev->caps.num_ports; port++) {
2504                 err = mlx4_counter_alloc(dev, &idx, MLX4_RES_USAGE_DRIVER);
2505
2506                 if (!err || err == -ENOSPC) {
2507                         priv->def_counter[port] = idx;
2508                 } else if (err == -ENOENT) {
2509                         err = 0;
2510                         continue;
2511                 } else if (mlx4_is_slave(dev) && err == -EINVAL) {
2512                         priv->def_counter[port] = MLX4_SINK_COUNTER_INDEX(dev);
2513                         mlx4_warn(dev, "can't allocate counter from old PF driver, using index %d\n",
2514                                   MLX4_SINK_COUNTER_INDEX(dev));
2515                         err = 0;
2516                 } else {
2517                         mlx4_err(dev, "%s: failed to allocate default counter port %d err %d\n",
2518                                  __func__, port + 1, err);
2519                         mlx4_cleanup_default_counters(dev);
2520                         return err;
2521                 }
2522
2523                 mlx4_dbg(dev, "%s: default counter index %d for port %d\n",
2524                          __func__, priv->def_counter[port], port + 1);
2525         }
2526
2527         return err;
2528 }
2529
2530 int __mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx)
2531 {
2532         struct mlx4_priv *priv = mlx4_priv(dev);
2533
2534         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2535                 return -ENOENT;
2536
2537         *idx = mlx4_bitmap_alloc(&priv->counters_bitmap);
2538         if (*idx == -1) {
2539                 *idx = MLX4_SINK_COUNTER_INDEX(dev);
2540                 return -ENOSPC;
2541         }
2542
2543         return 0;
2544 }
2545
2546 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx, u8 usage)
2547 {
2548         u32 in_modifier = RES_COUNTER | (((u32)usage & 3) << 30);
2549         u64 out_param;
2550         int err;
2551
2552         if (mlx4_is_mfunc(dev)) {
2553                 err = mlx4_cmd_imm(dev, 0, &out_param, in_modifier,
2554                                    RES_OP_RESERVE, MLX4_CMD_ALLOC_RES,
2555                                    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
2556                 if (!err)
2557                         *idx = get_param_l(&out_param);
2558
2559                 return err;
2560         }
2561         return __mlx4_counter_alloc(dev, idx);
2562 }
2563 EXPORT_SYMBOL_GPL(mlx4_counter_alloc);
2564
2565 static int __mlx4_clear_if_stat(struct mlx4_dev *dev,
2566                                 u8 counter_index)
2567 {
2568         struct mlx4_cmd_mailbox *if_stat_mailbox;
2569         int err;
2570         u32 if_stat_in_mod = (counter_index & 0xff) | MLX4_QUERY_IF_STAT_RESET;
2571
2572         if_stat_mailbox = mlx4_alloc_cmd_mailbox(dev);
2573         if (IS_ERR(if_stat_mailbox))
2574                 return PTR_ERR(if_stat_mailbox);
2575
2576         err = mlx4_cmd_box(dev, 0, if_stat_mailbox->dma, if_stat_in_mod, 0,
2577                            MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
2578                            MLX4_CMD_NATIVE);
2579
2580         mlx4_free_cmd_mailbox(dev, if_stat_mailbox);
2581         return err;
2582 }
2583
2584 void __mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2585 {
2586         if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_COUNTERS))
2587                 return;
2588
2589         if (idx == MLX4_SINK_COUNTER_INDEX(dev))
2590                 return;
2591
2592         __mlx4_clear_if_stat(dev, idx);
2593
2594         mlx4_bitmap_free(&mlx4_priv(dev)->counters_bitmap, idx, MLX4_USE_RR);
2595         return;
2596 }
2597
2598 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
2599 {
2600         u64 in_param = 0;
2601
2602         if (mlx4_is_mfunc(dev)) {
2603                 set_param_l(&in_param, idx);
2604                 mlx4_cmd(dev, in_param, RES_COUNTER, RES_OP_RESERVE,
2605                          MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
2606                          MLX4_CMD_WRAPPED);
2607                 return;
2608         }
2609         __mlx4_counter_free(dev, idx);
2610 }
2611 EXPORT_SYMBOL_GPL(mlx4_counter_free);
2612
2613 int mlx4_get_default_counter_index(struct mlx4_dev *dev, int port)
2614 {
2615         struct mlx4_priv *priv = mlx4_priv(dev);
2616
2617         return priv->def_counter[port - 1];
2618 }
2619 EXPORT_SYMBOL_GPL(mlx4_get_default_counter_index);
2620
2621 void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
2622 {
2623         struct mlx4_priv *priv = mlx4_priv(dev);
2624
2625         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2626 }
2627 EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
2628
2629 __be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
2630 {
2631         struct mlx4_priv *priv = mlx4_priv(dev);
2632
2633         return priv->mfunc.master.vf_admin[entry].vport[port].guid;
2634 }
2635 EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
2636
2637 void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
2638 {
2639         struct mlx4_priv *priv = mlx4_priv(dev);
2640         __be64 guid;
2641
2642         /* hw GUID */
2643         if (entry == 0)
2644                 return;
2645
2646         get_random_bytes((char *)&guid, sizeof(guid));
2647         guid &= ~(cpu_to_be64(1ULL << 56));
2648         guid |= cpu_to_be64(1ULL << 57);
2649         priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
2650 }
2651
2652 static int mlx4_setup_hca(struct mlx4_dev *dev)
2653 {
2654         struct mlx4_priv *priv = mlx4_priv(dev);
2655         int err;
2656         int port;
2657         __be32 ib_port_default_caps;
2658
2659         err = mlx4_init_uar_table(dev);
2660         if (err) {
2661                 mlx4_err(dev, "Failed to initialize user access region table, aborting\n");
2662                 return err;
2663         }
2664
2665         err = mlx4_uar_alloc(dev, &priv->driver_uar);
2666         if (err) {
2667                 mlx4_err(dev, "Failed to allocate driver access region, aborting\n");
2668                 goto err_uar_table_free;
2669         }
2670
2671         priv->kar = ioremap((phys_addr_t) priv->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
2672         if (!priv->kar) {
2673                 mlx4_err(dev, "Couldn't map kernel access region, aborting\n");
2674                 err = -ENOMEM;
2675                 goto err_uar_free;
2676         }
2677
2678         err = mlx4_init_pd_table(dev);
2679         if (err) {
2680                 mlx4_err(dev, "Failed to initialize protection domain table, aborting\n");
2681                 goto err_kar_unmap;
2682         }
2683
2684         err = mlx4_init_xrcd_table(dev);
2685         if (err) {
2686                 mlx4_err(dev, "Failed to initialize reliable connection domain table, aborting\n");
2687                 goto err_pd_table_free;
2688         }
2689
2690         err = mlx4_init_mr_table(dev);
2691         if (err) {
2692                 mlx4_err(dev, "Failed to initialize memory region table, aborting\n");
2693                 goto err_xrcd_table_free;
2694         }
2695
2696         if (!mlx4_is_slave(dev)) {
2697                 err = mlx4_init_mcg_table(dev);
2698                 if (err) {
2699                         mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
2700                         goto err_mr_table_free;
2701                 }
2702                 err = mlx4_config_mad_demux(dev);
2703                 if (err) {
2704                         mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
2705                         goto err_mcg_table_free;
2706                 }
2707         }
2708
2709         err = mlx4_init_eq_table(dev);
2710         if (err) {
2711                 mlx4_err(dev, "Failed to initialize event queue table, aborting\n");
2712                 goto err_mcg_table_free;
2713         }
2714
2715         err = mlx4_cmd_use_events(dev);
2716         if (err) {
2717                 mlx4_err(dev, "Failed to switch to event-driven firmware commands, aborting\n");
2718                 goto err_eq_table_free;
2719         }
2720
2721         err = mlx4_NOP(dev);
2722         if (err) {
2723                 if (dev->flags & MLX4_FLAG_MSI_X) {
2724                         mlx4_warn(dev, "NOP command failed to generate MSI-X interrupt IRQ %d)\n",
2725                                   priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2726                         mlx4_warn(dev, "Trying again without MSI-X\n");
2727                 } else {
2728                         mlx4_err(dev, "NOP command failed to generate interrupt (IRQ %d), aborting\n",
2729                                  priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
2730                         mlx4_err(dev, "BIOS or ACPI interrupt routing problem?\n");
2731                 }
2732
2733                 goto err_cmd_poll;
2734         }
2735
2736         mlx4_dbg(dev, "NOP command IRQ test passed\n");
2737
2738         err = mlx4_init_cq_table(dev);
2739         if (err) {
2740                 mlx4_err(dev, "Failed to initialize completion queue table, aborting\n");
2741                 goto err_cmd_poll;
2742         }
2743
2744         err = mlx4_init_srq_table(dev);
2745         if (err) {
2746                 mlx4_err(dev, "Failed to initialize shared receive queue table, aborting\n");
2747                 goto err_cq_table_free;
2748         }
2749
2750         err = mlx4_init_qp_table(dev);
2751         if (err) {
2752                 mlx4_err(dev, "Failed to initialize queue pair table, aborting\n");
2753                 goto err_srq_table_free;
2754         }
2755
2756         if (!mlx4_is_slave(dev)) {
2757                 err = mlx4_init_counters_table(dev);
2758                 if (err && err != -ENOENT) {
2759                         mlx4_err(dev, "Failed to initialize counters table, aborting\n");
2760                         goto err_qp_table_free;
2761                 }
2762         }
2763
2764         err = mlx4_allocate_default_counters(dev);
2765         if (err) {
2766                 mlx4_err(dev, "Failed to allocate default counters, aborting\n");
2767                 goto err_counters_table_free;
2768         }
2769
2770         if (!mlx4_is_slave(dev)) {
2771                 for (port = 1; port <= dev->caps.num_ports; port++) {
2772                         ib_port_default_caps = 0;
2773                         err = mlx4_get_port_ib_caps(dev, port,
2774                                                     &ib_port_default_caps);
2775                         if (err)
2776                                 mlx4_warn(dev, "failed to get port %d default ib capabilities (%d). Continuing with caps = 0\n",
2777                                           port, err);
2778                         dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
2779
2780                         /* initialize per-slave default ib port capabilities */
2781                         if (mlx4_is_master(dev)) {
2782                                 int i;
2783                                 for (i = 0; i < dev->num_slaves; i++) {
2784                                         if (i == mlx4_master_func_num(dev))
2785                                                 continue;
2786                                         priv->mfunc.master.slave_state[i].ib_cap_mask[port] =
2787                                                 ib_port_default_caps;
2788                                 }
2789                         }
2790
2791                         if (mlx4_is_mfunc(dev))
2792                                 dev->caps.port_ib_mtu[port] = IB_MTU_2048;
2793                         else
2794                                 dev->caps.port_ib_mtu[port] = IB_MTU_4096;
2795
2796                         err = mlx4_SET_PORT(dev, port, mlx4_is_master(dev) ?
2797                                             dev->caps.pkey_table_len[port] : -1);
2798                         if (err) {
2799                                 mlx4_err(dev, "Failed to set port %d, aborting\n",
2800                                          port);
2801                                 goto err_default_countes_free;
2802                         }
2803                 }
2804         }
2805
2806         return 0;
2807
2808 err_default_countes_free:
2809         mlx4_cleanup_default_counters(dev);
2810
2811 err_counters_table_free:
2812         if (!mlx4_is_slave(dev))
2813                 mlx4_cleanup_counters_table(dev);
2814
2815 err_qp_table_free:
2816         mlx4_cleanup_qp_table(dev);
2817
2818 err_srq_table_free:
2819         mlx4_cleanup_srq_table(dev);
2820
2821 err_cq_table_free:
2822         mlx4_cleanup_cq_table(dev);
2823
2824 err_cmd_poll:
2825         mlx4_cmd_use_polling(dev);
2826
2827 err_eq_table_free:
2828         mlx4_cleanup_eq_table(dev);
2829
2830 err_mcg_table_free:
2831         if (!mlx4_is_slave(dev))
2832                 mlx4_cleanup_mcg_table(dev);
2833
2834 err_mr_table_free:
2835         mlx4_cleanup_mr_table(dev);
2836
2837 err_xrcd_table_free:
2838         mlx4_cleanup_xrcd_table(dev);
2839
2840 err_pd_table_free:
2841         mlx4_cleanup_pd_table(dev);
2842
2843 err_kar_unmap:
2844         iounmap(priv->kar);
2845
2846 err_uar_free:
2847         mlx4_uar_free(dev, &priv->driver_uar);
2848
2849 err_uar_table_free:
2850         mlx4_cleanup_uar_table(dev);
2851         return err;
2852 }
2853
2854 static int mlx4_init_affinity_hint(struct mlx4_dev *dev, int port, int eqn)
2855 {
2856         int requested_cpu = 0;
2857         struct mlx4_priv *priv = mlx4_priv(dev);
2858         struct mlx4_eq *eq;
2859         int off = 0;
2860         int i;
2861
2862         if (eqn > dev->caps.num_comp_vectors)
2863                 return -EINVAL;
2864
2865         for (i = 1; i < port; i++)
2866                 off += mlx4_get_eqs_per_port(dev, i);
2867
2868         requested_cpu = eqn - off - !!(eqn > MLX4_EQ_ASYNC);
2869
2870         /* Meaning EQs are shared, and this call comes from the second port */
2871         if (requested_cpu < 0)
2872                 return 0;
2873
2874         eq = &priv->eq_table.eq[eqn];
2875
2876         if (!zalloc_cpumask_var(&eq->affinity_mask, GFP_KERNEL))
2877                 return -ENOMEM;
2878
2879         cpumask_set_cpu(requested_cpu, eq->affinity_mask);
2880
2881         return 0;
2882 }
2883
2884 static void mlx4_enable_msi_x(struct mlx4_dev *dev)
2885 {
2886         struct mlx4_priv *priv = mlx4_priv(dev);
2887         struct msix_entry *entries;
2888         int i;
2889         int port = 0;
2890
2891         if (msi_x) {
2892                 int nreq = min3(dev->caps.num_ports *
2893                                 (int)num_online_cpus() + 1,
2894                                 dev->caps.num_eqs - dev->caps.reserved_eqs,
2895                                 MAX_MSIX);
2896
2897                 entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL);
2898                 if (!entries)
2899                         goto no_msi;
2900
2901                 for (i = 0; i < nreq; ++i)
2902                         entries[i].entry = i;
2903
2904                 nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
2905                                              nreq);
2906
2907                 if (nreq < 0 || nreq < MLX4_EQ_ASYNC) {
2908                         kfree(entries);
2909                         goto no_msi;
2910                 }
2911                 /* 1 is reserved for events (asyncrounous EQ) */
2912                 dev->caps.num_comp_vectors = nreq - 1;
2913
2914                 priv->eq_table.eq[MLX4_EQ_ASYNC].irq = entries[0].vector;
2915                 bitmap_zero(priv->eq_table.eq[MLX4_EQ_ASYNC].actv_ports.ports,
2916                             dev->caps.num_ports);
2917
2918                 for (i = 0; i < dev->caps.num_comp_vectors + 1; i++) {
2919                         if (i == MLX4_EQ_ASYNC)
2920                                 continue;
2921
2922                         priv->eq_table.eq[i].irq =
2923                                 entries[i + 1 - !!(i > MLX4_EQ_ASYNC)].vector;
2924
2925                         if (MLX4_IS_LEGACY_EQ_MODE(dev->caps)) {
2926                                 bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2927                                             dev->caps.num_ports);
2928                                 /* We don't set affinity hint when there
2929                                  * aren't enough EQs
2930                                  */
2931                         } else {
2932                                 set_bit(port,
2933                                         priv->eq_table.eq[i].actv_ports.ports);
2934                                 if (mlx4_init_affinity_hint(dev, port + 1, i))
2935                                         mlx4_warn(dev, "Couldn't init hint cpumask for EQ %d\n",
2936                                                   i);
2937                         }
2938                         /* We divide the Eqs evenly between the two ports.
2939                          * (dev->caps.num_comp_vectors / dev->caps.num_ports)
2940                          * refers to the number of Eqs per port
2941                          * (i.e eqs_per_port). Theoretically, we would like to
2942                          * write something like (i + 1) % eqs_per_port == 0.
2943                          * However, since there's an asynchronous Eq, we have
2944                          * to skip over it by comparing this condition to
2945                          * !!((i + 1) > MLX4_EQ_ASYNC).
2946                          */
2947                         if ((dev->caps.num_comp_vectors > dev->caps.num_ports) &&
2948                             ((i + 1) %
2949                              (dev->caps.num_comp_vectors / dev->caps.num_ports)) ==
2950                             !!((i + 1) > MLX4_EQ_ASYNC))
2951                                 /* If dev->caps.num_comp_vectors < dev->caps.num_ports,
2952                                  * everything is shared anyway.
2953                                  */
2954                                 port++;
2955                 }
2956
2957                 dev->flags |= MLX4_FLAG_MSI_X;
2958
2959                 kfree(entries);
2960                 return;
2961         }
2962
2963 no_msi:
2964         dev->caps.num_comp_vectors = 1;
2965
2966         BUG_ON(MLX4_EQ_ASYNC >= 2);
2967         for (i = 0; i < 2; ++i) {
2968                 priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
2969                 if (i != MLX4_EQ_ASYNC) {
2970                         bitmap_fill(priv->eq_table.eq[i].actv_ports.ports,
2971                                     dev->caps.num_ports);
2972                 }
2973         }
2974 }
2975
2976 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
2977 {
2978         struct devlink *devlink = priv_to_devlink(mlx4_priv(dev));
2979         struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
2980         int err;
2981
2982         err = devlink_port_register(devlink, &info->devlink_port, port);
2983         if (err)
2984                 return err;
2985
2986         info->dev = dev;
2987         info->port = port;
2988         if (!mlx4_is_slave(dev)) {
2989                 mlx4_init_mac_table(dev, &info->mac_table);
2990                 mlx4_init_vlan_table(dev, &info->vlan_table);
2991                 mlx4_init_roce_gid_table(dev, &info->gid_table);
2992                 info->base_qpn = mlx4_get_base_qpn(dev, port);
2993         }
2994
2995         sprintf(info->dev_name, "mlx4_port%d", port);
2996         info->port_attr.attr.name = info->dev_name;
2997         if (mlx4_is_mfunc(dev)) {
2998                 info->port_attr.attr.mode = 0444;
2999         } else {
3000                 info->port_attr.attr.mode = 0644;
3001                 info->port_attr.store     = set_port_type;
3002         }
3003         info->port_attr.show      = show_port_type;
3004         sysfs_attr_init(&info->port_attr.attr);
3005
3006         err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
3007         if (err) {
3008                 mlx4_err(dev, "Failed to create file for port %d\n", port);
3009                 devlink_port_unregister(&info->devlink_port);
3010                 info->port = -1;
3011         }
3012
3013         sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port);
3014         info->port_mtu_attr.attr.name = info->dev_mtu_name;
3015         if (mlx4_is_mfunc(dev)) {
3016                 info->port_mtu_attr.attr.mode = 0444;
3017         } else {
3018                 info->port_mtu_attr.attr.mode = 0644;
3019                 info->port_mtu_attr.store     = set_port_ib_mtu;
3020         }
3021         info->port_mtu_attr.show      = show_port_ib_mtu;
3022         sysfs_attr_init(&info->port_mtu_attr.attr);
3023
3024         err = device_create_file(&dev->persist->pdev->dev,
3025                                  &info->port_mtu_attr);
3026         if (err) {
3027                 mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
3028                 device_remove_file(&info->dev->persist->pdev->dev,
3029                                    &info->port_attr);
3030                 devlink_port_unregister(&info->devlink_port);
3031                 info->port = -1;
3032         }
3033
3034         return err;
3035 }
3036
3037 static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
3038 {
3039         if (info->port < 0)
3040                 return;
3041
3042         device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
3043         device_remove_file(&info->dev->persist->pdev->dev,
3044                            &info->port_mtu_attr);
3045         devlink_port_unregister(&info->devlink_port);
3046
3047 #ifdef CONFIG_RFS_ACCEL
3048         free_irq_cpu_rmap(info->rmap);
3049         info->rmap = NULL;
3050 #endif
3051 }
3052
3053 static int mlx4_init_steering(struct mlx4_dev *dev)
3054 {
3055         struct mlx4_priv *priv = mlx4_priv(dev);
3056         int num_entries = dev->caps.num_ports;
3057         int i, j;
3058
3059         priv->steer = kzalloc(sizeof(struct mlx4_steer) * num_entries, GFP_KERNEL);
3060         if (!priv->steer)
3061                 return -ENOMEM;
3062
3063         for (i = 0; i < num_entries; i++)
3064                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3065                         INIT_LIST_HEAD(&priv->steer[i].promisc_qps[j]);
3066                         INIT_LIST_HEAD(&priv->steer[i].steer_entries[j]);
3067                 }
3068         return 0;
3069 }
3070
3071 static void mlx4_clear_steering(struct mlx4_dev *dev)
3072 {
3073         struct mlx4_priv *priv = mlx4_priv(dev);
3074         struct mlx4_steer_index *entry, *tmp_entry;
3075         struct mlx4_promisc_qp *pqp, *tmp_pqp;
3076         int num_entries = dev->caps.num_ports;
3077         int i, j;
3078
3079         for (i = 0; i < num_entries; i++) {
3080                 for (j = 0; j < MLX4_NUM_STEERS; j++) {
3081                         list_for_each_entry_safe(pqp, tmp_pqp,
3082                                                  &priv->steer[i].promisc_qps[j],
3083                                                  list) {
3084                                 list_del(&pqp->list);
3085                                 kfree(pqp);
3086                         }
3087                         list_for_each_entry_safe(entry, tmp_entry,
3088                                                  &priv->steer[i].steer_entries[j],
3089                                                  list) {
3090                                 list_del(&entry->list);
3091                                 list_for_each_entry_safe(pqp, tmp_pqp,
3092                                                          &entry->duplicates,
3093                                                          list) {
3094                                         list_del(&pqp->list);
3095                                         kfree(pqp);
3096                                 }
3097                                 kfree(entry);
3098                         }
3099