Merge tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma...
[muen/linux.git] / drivers / infiniband / hw / qedr / verbs.c
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_roce_cm.h"
53
54 #define DB_ADDR_SHIFT(addr)             ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
57                                         size_t len)
58 {
59         size_t min_len = min_t(size_t, len, udata->outlen);
60
61         return ib_copy_to_udata(udata, src, min_len);
62 }
63
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
65 {
66         if (index > QEDR_ROCE_PKEY_TABLE_LEN)
67                 return -EINVAL;
68
69         *pkey = QEDR_ROCE_PKEY_DEFAULT;
70         return 0;
71 }
72
73 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
74                       int index, union ib_gid *sgid)
75 {
76         struct qedr_dev *dev = get_qedr_dev(ibdev);
77
78         memset(sgid->raw, 0, sizeof(sgid->raw));
79         ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
80
81         DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
82                  sgid->global.interface_id, sgid->global.subnet_prefix);
83
84         return 0;
85 }
86
87 int qedr_query_device(struct ib_device *ibdev,
88                       struct ib_device_attr *attr, struct ib_udata *udata)
89 {
90         struct qedr_dev *dev = get_qedr_dev(ibdev);
91         struct qedr_device_attr *qattr = &dev->attr;
92
93         if (!dev->rdma_ctx) {
94                 DP_ERR(dev,
95                        "qedr_query_device called with invalid params rdma_ctx=%p\n",
96                        dev->rdma_ctx);
97                 return -EINVAL;
98         }
99
100         memset(attr, 0, sizeof(*attr));
101
102         attr->fw_ver = qattr->fw_ver;
103         attr->sys_image_guid = qattr->sys_image_guid;
104         attr->max_mr_size = qattr->max_mr_size;
105         attr->page_size_cap = qattr->page_size_caps;
106         attr->vendor_id = qattr->vendor_id;
107         attr->vendor_part_id = qattr->vendor_part_id;
108         attr->hw_ver = qattr->hw_ver;
109         attr->max_qp = qattr->max_qp;
110         attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
111         attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
112             IB_DEVICE_RC_RNR_NAK_GEN |
113             IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
114
115         attr->max_sge = qattr->max_sge;
116         attr->max_sge_rd = qattr->max_sge;
117         attr->max_cq = qattr->max_cq;
118         attr->max_cqe = qattr->max_cqe;
119         attr->max_mr = qattr->max_mr;
120         attr->max_mw = qattr->max_mw;
121         attr->max_pd = qattr->max_pd;
122         attr->atomic_cap = dev->atomic_cap;
123         attr->max_fmr = qattr->max_fmr;
124         attr->max_map_per_fmr = 16;
125         attr->max_qp_init_rd_atom =
126             1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
127         attr->max_qp_rd_atom =
128             min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
129                 attr->max_qp_init_rd_atom);
130
131         attr->max_srq = qattr->max_srq;
132         attr->max_srq_sge = qattr->max_srq_sge;
133         attr->max_srq_wr = qattr->max_srq_wr;
134
135         attr->local_ca_ack_delay = qattr->dev_ack_delay;
136         attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
137         attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
138         attr->max_ah = qattr->max_ah;
139
140         return 0;
141 }
142
143 #define QEDR_SPEED_SDR          (1)
144 #define QEDR_SPEED_DDR          (2)
145 #define QEDR_SPEED_QDR          (4)
146 #define QEDR_SPEED_FDR10        (8)
147 #define QEDR_SPEED_FDR          (16)
148 #define QEDR_SPEED_EDR          (32)
149
150 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
151                                             u8 *ib_width)
152 {
153         switch (speed) {
154         case 1000:
155                 *ib_speed = QEDR_SPEED_SDR;
156                 *ib_width = IB_WIDTH_1X;
157                 break;
158         case 10000:
159                 *ib_speed = QEDR_SPEED_QDR;
160                 *ib_width = IB_WIDTH_1X;
161                 break;
162
163         case 20000:
164                 *ib_speed = QEDR_SPEED_DDR;
165                 *ib_width = IB_WIDTH_4X;
166                 break;
167
168         case 25000:
169                 *ib_speed = QEDR_SPEED_EDR;
170                 *ib_width = IB_WIDTH_1X;
171                 break;
172
173         case 40000:
174                 *ib_speed = QEDR_SPEED_QDR;
175                 *ib_width = IB_WIDTH_4X;
176                 break;
177
178         case 50000:
179                 *ib_speed = QEDR_SPEED_QDR;
180                 *ib_width = IB_WIDTH_4X;
181                 break;
182
183         case 100000:
184                 *ib_speed = QEDR_SPEED_EDR;
185                 *ib_width = IB_WIDTH_4X;
186                 break;
187
188         default:
189                 /* Unsupported */
190                 *ib_speed = QEDR_SPEED_SDR;
191                 *ib_width = IB_WIDTH_1X;
192         }
193 }
194
195 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
196 {
197         struct qedr_dev *dev;
198         struct qed_rdma_port *rdma_port;
199
200         dev = get_qedr_dev(ibdev);
201         if (port > 1) {
202                 DP_ERR(dev, "invalid_port=0x%x\n", port);
203                 return -EINVAL;
204         }
205
206         if (!dev->rdma_ctx) {
207                 DP_ERR(dev, "rdma_ctx is NULL\n");
208                 return -EINVAL;
209         }
210
211         rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
212
213         /* *attr being zeroed by the caller, avoid zeroing it here */
214         if (rdma_port->port_state == QED_RDMA_PORT_UP) {
215                 attr->state = IB_PORT_ACTIVE;
216                 attr->phys_state = 5;
217         } else {
218                 attr->state = IB_PORT_DOWN;
219                 attr->phys_state = 3;
220         }
221         attr->max_mtu = IB_MTU_4096;
222         attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
223         attr->lid = 0;
224         attr->lmc = 0;
225         attr->sm_lid = 0;
226         attr->sm_sl = 0;
227         attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
228         if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
229                 attr->gid_tbl_len = 1;
230                 attr->pkey_tbl_len = 1;
231         } else {
232                 attr->gid_tbl_len = QEDR_MAX_SGID;
233                 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
234         }
235         attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
236         attr->qkey_viol_cntr = 0;
237         get_link_speed_and_width(rdma_port->link_speed,
238                                  &attr->active_speed, &attr->active_width);
239         attr->max_msg_sz = rdma_port->max_msg_size;
240         attr->max_vl_num = 4;
241
242         return 0;
243 }
244
245 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
246                      struct ib_port_modify *props)
247 {
248         struct qedr_dev *dev;
249
250         dev = get_qedr_dev(ibdev);
251         if (port > 1) {
252                 DP_ERR(dev, "invalid_port=0x%x\n", port);
253                 return -EINVAL;
254         }
255
256         return 0;
257 }
258
259 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
260                          unsigned long len)
261 {
262         struct qedr_mm *mm;
263
264         mm = kzalloc(sizeof(*mm), GFP_KERNEL);
265         if (!mm)
266                 return -ENOMEM;
267
268         mm->key.phy_addr = phy_addr;
269         /* This function might be called with a length which is not a multiple
270          * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
271          * forces this granularity by increasing the requested size if needed.
272          * When qedr_mmap is called, it will search the list with the updated
273          * length as a key. To prevent search failures, the length is rounded up
274          * in advance to PAGE_SIZE.
275          */
276         mm->key.len = roundup(len, PAGE_SIZE);
277         INIT_LIST_HEAD(&mm->entry);
278
279         mutex_lock(&uctx->mm_list_lock);
280         list_add(&mm->entry, &uctx->mm_head);
281         mutex_unlock(&uctx->mm_list_lock);
282
283         DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
284                  "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
285                  (unsigned long long)mm->key.phy_addr,
286                  (unsigned long)mm->key.len, uctx);
287
288         return 0;
289 }
290
291 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
292                              unsigned long len)
293 {
294         bool found = false;
295         struct qedr_mm *mm;
296
297         mutex_lock(&uctx->mm_list_lock);
298         list_for_each_entry(mm, &uctx->mm_head, entry) {
299                 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
300                         continue;
301
302                 found = true;
303                 break;
304         }
305         mutex_unlock(&uctx->mm_list_lock);
306         DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
307                  "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
308                  mm->key.phy_addr, mm->key.len, uctx, found);
309
310         return found;
311 }
312
313 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
314                                         struct ib_udata *udata)
315 {
316         int rc;
317         struct qedr_ucontext *ctx;
318         struct qedr_alloc_ucontext_resp uresp;
319         struct qedr_dev *dev = get_qedr_dev(ibdev);
320         struct qed_rdma_add_user_out_params oparams;
321
322         if (!udata)
323                 return ERR_PTR(-EFAULT);
324
325         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
326         if (!ctx)
327                 return ERR_PTR(-ENOMEM);
328
329         rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
330         if (rc) {
331                 DP_ERR(dev,
332                        "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
333                        rc);
334                 goto err;
335         }
336
337         ctx->dpi = oparams.dpi;
338         ctx->dpi_addr = oparams.dpi_addr;
339         ctx->dpi_phys_addr = oparams.dpi_phys_addr;
340         ctx->dpi_size = oparams.dpi_size;
341         INIT_LIST_HEAD(&ctx->mm_head);
342         mutex_init(&ctx->mm_list_lock);
343
344         memset(&uresp, 0, sizeof(uresp));
345
346         uresp.dpm_enabled = dev->user_dpm_enabled;
347         uresp.wids_enabled = 1;
348         uresp.wid_count = oparams.wid_count;
349         uresp.db_pa = ctx->dpi_phys_addr;
350         uresp.db_size = ctx->dpi_size;
351         uresp.max_send_wr = dev->attr.max_sqe;
352         uresp.max_recv_wr = dev->attr.max_rqe;
353         uresp.max_srq_wr = dev->attr.max_srq_wr;
354         uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
355         uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
356         uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
357         uresp.max_cqes = QEDR_MAX_CQES;
358
359         rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
360         if (rc)
361                 goto err;
362
363         ctx->dev = dev;
364
365         rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
366         if (rc)
367                 goto err;
368
369         DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
370                  &ctx->ibucontext);
371         return &ctx->ibucontext;
372
373 err:
374         kfree(ctx);
375         return ERR_PTR(rc);
376 }
377
378 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
379 {
380         struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
381         struct qedr_mm *mm, *tmp;
382         int status = 0;
383
384         DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
385                  uctx);
386         uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
387
388         list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
389                 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
390                          "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
391                          mm->key.phy_addr, mm->key.len, uctx);
392                 list_del(&mm->entry);
393                 kfree(mm);
394         }
395
396         kfree(uctx);
397         return status;
398 }
399
400 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
401 {
402         struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
403         struct qedr_dev *dev = get_qedr_dev(context->device);
404         unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
405         u64 unmapped_db = dev->db_phys_addr;
406         unsigned long len = (vma->vm_end - vma->vm_start);
407         int rc = 0;
408         bool found;
409
410         DP_DEBUG(dev, QEDR_MSG_INIT,
411                  "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
412                  vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
413         if (vma->vm_start & (PAGE_SIZE - 1)) {
414                 DP_ERR(dev, "Vma_start not page aligned = %ld\n",
415                        vma->vm_start);
416                 return -EINVAL;
417         }
418
419         found = qedr_search_mmap(ucontext, vm_page, len);
420         if (!found) {
421                 DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
422                        vma->vm_pgoff);
423                 return -EINVAL;
424         }
425
426         DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
427
428         if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
429                                                      dev->db_size))) {
430                 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
431                 if (vma->vm_flags & VM_READ) {
432                         DP_ERR(dev, "Trying to map doorbell bar for read\n");
433                         return -EPERM;
434                 }
435
436                 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
437
438                 rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
439                                         PAGE_SIZE, vma->vm_page_prot);
440         } else {
441                 DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n");
442                 rc = remap_pfn_range(vma, vma->vm_start,
443                                      vma->vm_pgoff, len, vma->vm_page_prot);
444         }
445         DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
446         return rc;
447 }
448
449 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
450                             struct ib_ucontext *context, struct ib_udata *udata)
451 {
452         struct qedr_dev *dev = get_qedr_dev(ibdev);
453         struct qedr_pd *pd;
454         u16 pd_id;
455         int rc;
456
457         DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
458                  (udata && context) ? "User Lib" : "Kernel");
459
460         if (!dev->rdma_ctx) {
461                 DP_ERR(dev, "invalid RDMA context\n");
462                 return ERR_PTR(-EINVAL);
463         }
464
465         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
466         if (!pd)
467                 return ERR_PTR(-ENOMEM);
468
469         rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
470         if (rc)
471                 goto err;
472
473         pd->pd_id = pd_id;
474
475         if (udata && context) {
476                 struct qedr_alloc_pd_uresp uresp = {
477                         .pd_id = pd_id,
478                 };
479
480                 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
481                 if (rc) {
482                         DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
483                         dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
484                         goto err;
485                 }
486
487                 pd->uctx = get_qedr_ucontext(context);
488                 pd->uctx->pd = pd;
489         }
490
491         return &pd->ibpd;
492
493 err:
494         kfree(pd);
495         return ERR_PTR(rc);
496 }
497
498 int qedr_dealloc_pd(struct ib_pd *ibpd)
499 {
500         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
501         struct qedr_pd *pd = get_qedr_pd(ibpd);
502
503         if (!pd) {
504                 pr_err("Invalid PD received in dealloc_pd\n");
505                 return -EINVAL;
506         }
507
508         DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
509         dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
510
511         kfree(pd);
512
513         return 0;
514 }
515
516 static void qedr_free_pbl(struct qedr_dev *dev,
517                           struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
518 {
519         struct pci_dev *pdev = dev->pdev;
520         int i;
521
522         for (i = 0; i < pbl_info->num_pbls; i++) {
523                 if (!pbl[i].va)
524                         continue;
525                 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
526                                   pbl[i].va, pbl[i].pa);
527         }
528
529         kfree(pbl);
530 }
531
532 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
533 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
534
535 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
536 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
537 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
538
539 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
540                                            struct qedr_pbl_info *pbl_info,
541                                            gfp_t flags)
542 {
543         struct pci_dev *pdev = dev->pdev;
544         struct qedr_pbl *pbl_table;
545         dma_addr_t *pbl_main_tbl;
546         dma_addr_t pa;
547         void *va;
548         int i;
549
550         pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
551         if (!pbl_table)
552                 return ERR_PTR(-ENOMEM);
553
554         for (i = 0; i < pbl_info->num_pbls; i++) {
555                 va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
556                                          &pa, flags);
557                 if (!va)
558                         goto err;
559
560                 pbl_table[i].va = va;
561                 pbl_table[i].pa = pa;
562         }
563
564         /* Two-Layer PBLs, if we have more than one pbl we need to initialize
565          * the first one with physical pointers to all of the rest
566          */
567         pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
568         for (i = 0; i < pbl_info->num_pbls - 1; i++)
569                 pbl_main_tbl[i] = pbl_table[i + 1].pa;
570
571         return pbl_table;
572
573 err:
574         for (i--; i >= 0; i--)
575                 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
576                                   pbl_table[i].va, pbl_table[i].pa);
577
578         qedr_free_pbl(dev, pbl_info, pbl_table);
579
580         return ERR_PTR(-ENOMEM);
581 }
582
583 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
584                                 struct qedr_pbl_info *pbl_info,
585                                 u32 num_pbes, int two_layer_capable)
586 {
587         u32 pbl_capacity;
588         u32 pbl_size;
589         u32 num_pbls;
590
591         if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
592                 if (num_pbes > MAX_PBES_TWO_LAYER) {
593                         DP_ERR(dev, "prepare pbl table: too many pages %d\n",
594                                num_pbes);
595                         return -EINVAL;
596                 }
597
598                 /* calculate required pbl page size */
599                 pbl_size = MIN_FW_PBL_PAGE_SIZE;
600                 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
601                                NUM_PBES_ON_PAGE(pbl_size);
602
603                 while (pbl_capacity < num_pbes) {
604                         pbl_size *= 2;
605                         pbl_capacity = pbl_size / sizeof(u64);
606                         pbl_capacity = pbl_capacity * pbl_capacity;
607                 }
608
609                 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
610                 num_pbls++;     /* One for the layer0 ( points to the pbls) */
611                 pbl_info->two_layered = true;
612         } else {
613                 /* One layered PBL */
614                 num_pbls = 1;
615                 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
616                                  roundup_pow_of_two((num_pbes * sizeof(u64))));
617                 pbl_info->two_layered = false;
618         }
619
620         pbl_info->num_pbls = num_pbls;
621         pbl_info->pbl_size = pbl_size;
622         pbl_info->num_pbes = num_pbes;
623
624         DP_DEBUG(dev, QEDR_MSG_MR,
625                  "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
626                  pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
627
628         return 0;
629 }
630
631 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
632                                struct qedr_pbl *pbl,
633                                struct qedr_pbl_info *pbl_info, u32 pg_shift)
634 {
635         int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
636         u32 fw_pg_cnt, fw_pg_per_umem_pg;
637         struct qedr_pbl *pbl_tbl;
638         struct scatterlist *sg;
639         struct regpair *pbe;
640         u64 pg_addr;
641         int entry;
642
643         if (!pbl_info->num_pbes)
644                 return;
645
646         /* If we have a two layered pbl, the first pbl points to the rest
647          * of the pbls and the first entry lays on the second pbl in the table
648          */
649         if (pbl_info->two_layered)
650                 pbl_tbl = &pbl[1];
651         else
652                 pbl_tbl = pbl;
653
654         pbe = (struct regpair *)pbl_tbl->va;
655         if (!pbe) {
656                 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
657                 return;
658         }
659
660         pbe_cnt = 0;
661
662         shift = umem->page_shift;
663
664         fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
665
666         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
667                 pages = sg_dma_len(sg) >> shift;
668                 pg_addr = sg_dma_address(sg);
669                 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
670                         for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
671                                 pbe->lo = cpu_to_le32(pg_addr);
672                                 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
673
674                                 pg_addr += BIT(pg_shift);
675                                 pbe_cnt++;
676                                 total_num_pbes++;
677                                 pbe++;
678
679                                 if (total_num_pbes == pbl_info->num_pbes)
680                                         return;
681
682                                 /* If the given pbl is full storing the pbes,
683                                  * move to next pbl.
684                                  */
685                                 if (pbe_cnt ==
686                                     (pbl_info->pbl_size / sizeof(u64))) {
687                                         pbl_tbl++;
688                                         pbe = (struct regpair *)pbl_tbl->va;
689                                         pbe_cnt = 0;
690                                 }
691
692                                 fw_pg_cnt++;
693                         }
694                 }
695         }
696 }
697
698 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
699                               struct qedr_cq *cq, struct ib_udata *udata)
700 {
701         struct qedr_create_cq_uresp uresp;
702         int rc;
703
704         memset(&uresp, 0, sizeof(uresp));
705
706         uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
707         uresp.icid = cq->icid;
708
709         rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
710         if (rc)
711                 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
712
713         return rc;
714 }
715
716 static void consume_cqe(struct qedr_cq *cq)
717 {
718         if (cq->latest_cqe == cq->toggle_cqe)
719                 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
720
721         cq->latest_cqe = qed_chain_consume(&cq->pbl);
722 }
723
724 static inline int qedr_align_cq_entries(int entries)
725 {
726         u64 size, aligned_size;
727
728         /* We allocate an extra entry that we don't report to the FW. */
729         size = (entries + 1) * QEDR_CQE_SIZE;
730         aligned_size = ALIGN(size, PAGE_SIZE);
731
732         return aligned_size / QEDR_CQE_SIZE;
733 }
734
735 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
736                                        struct qedr_dev *dev,
737                                        struct qedr_userq *q,
738                                        u64 buf_addr, size_t buf_len,
739                                        int access, int dmasync,
740                                        int alloc_and_init)
741 {
742         u32 fw_pages;
743         int rc;
744
745         q->buf_addr = buf_addr;
746         q->buf_len = buf_len;
747         q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
748         if (IS_ERR(q->umem)) {
749                 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
750                        PTR_ERR(q->umem));
751                 return PTR_ERR(q->umem);
752         }
753
754         fw_pages = ib_umem_page_count(q->umem) <<
755             (q->umem->page_shift - FW_PAGE_SHIFT);
756
757         rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
758         if (rc)
759                 goto err0;
760
761         if (alloc_and_init) {
762                 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
763                 if (IS_ERR(q->pbl_tbl)) {
764                         rc = PTR_ERR(q->pbl_tbl);
765                         goto err0;
766                 }
767                 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
768                                    FW_PAGE_SHIFT);
769         } else {
770                 q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
771                 if (!q->pbl_tbl) {
772                         rc = -ENOMEM;
773                         goto err0;
774                 }
775         }
776
777         return 0;
778
779 err0:
780         ib_umem_release(q->umem);
781         q->umem = NULL;
782
783         return rc;
784 }
785
786 static inline void qedr_init_cq_params(struct qedr_cq *cq,
787                                        struct qedr_ucontext *ctx,
788                                        struct qedr_dev *dev, int vector,
789                                        int chain_entries, int page_cnt,
790                                        u64 pbl_ptr,
791                                        struct qed_rdma_create_cq_in_params
792                                        *params)
793 {
794         memset(params, 0, sizeof(*params));
795         params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
796         params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
797         params->cnq_id = vector;
798         params->cq_size = chain_entries - 1;
799         params->dpi = (ctx) ? ctx->dpi : dev->dpi;
800         params->pbl_num_pages = page_cnt;
801         params->pbl_ptr = pbl_ptr;
802         params->pbl_two_level = 0;
803 }
804
805 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
806 {
807         cq->db.data.agg_flags = flags;
808         cq->db.data.value = cpu_to_le32(cons);
809         writeq(cq->db.raw, cq->db_addr);
810
811         /* Make sure write would stick */
812         mmiowb();
813 }
814
815 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
816 {
817         struct qedr_cq *cq = get_qedr_cq(ibcq);
818         unsigned long sflags;
819         struct qedr_dev *dev;
820
821         dev = get_qedr_dev(ibcq->device);
822
823         if (cq->destroyed) {
824                 DP_ERR(dev,
825                        "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
826                        cq, cq->icid);
827                 return -EINVAL;
828         }
829
830
831         if (cq->cq_type == QEDR_CQ_TYPE_GSI)
832                 return 0;
833
834         spin_lock_irqsave(&cq->cq_lock, sflags);
835
836         cq->arm_flags = 0;
837
838         if (flags & IB_CQ_SOLICITED)
839                 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
840
841         if (flags & IB_CQ_NEXT_COMP)
842                 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
843
844         doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
845
846         spin_unlock_irqrestore(&cq->cq_lock, sflags);
847
848         return 0;
849 }
850
851 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
852                              const struct ib_cq_init_attr *attr,
853                              struct ib_ucontext *ib_ctx, struct ib_udata *udata)
854 {
855         struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
856         struct qed_rdma_destroy_cq_out_params destroy_oparams;
857         struct qed_rdma_destroy_cq_in_params destroy_iparams;
858         struct qedr_dev *dev = get_qedr_dev(ibdev);
859         struct qed_rdma_create_cq_in_params params;
860         struct qedr_create_cq_ureq ureq;
861         int vector = attr->comp_vector;
862         int entries = attr->cqe;
863         struct qedr_cq *cq;
864         int chain_entries;
865         int page_cnt;
866         u64 pbl_ptr;
867         u16 icid;
868         int rc;
869
870         DP_DEBUG(dev, QEDR_MSG_INIT,
871                  "create_cq: called from %s. entries=%d, vector=%d\n",
872                  udata ? "User Lib" : "Kernel", entries, vector);
873
874         if (entries > QEDR_MAX_CQES) {
875                 DP_ERR(dev,
876                        "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
877                        entries, QEDR_MAX_CQES);
878                 return ERR_PTR(-EINVAL);
879         }
880
881         chain_entries = qedr_align_cq_entries(entries);
882         chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
883
884         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
885         if (!cq)
886                 return ERR_PTR(-ENOMEM);
887
888         if (udata) {
889                 memset(&ureq, 0, sizeof(ureq));
890                 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
891                         DP_ERR(dev,
892                                "create cq: problem copying data from user space\n");
893                         goto err0;
894                 }
895
896                 if (!ureq.len) {
897                         DP_ERR(dev,
898                                "create cq: cannot create a cq with 0 entries\n");
899                         goto err0;
900                 }
901
902                 cq->cq_type = QEDR_CQ_TYPE_USER;
903
904                 rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
905                                           ureq.len, IB_ACCESS_LOCAL_WRITE,
906                                           1, 1);
907                 if (rc)
908                         goto err0;
909
910                 pbl_ptr = cq->q.pbl_tbl->pa;
911                 page_cnt = cq->q.pbl_info.num_pbes;
912
913                 cq->ibcq.cqe = chain_entries;
914         } else {
915                 cq->cq_type = QEDR_CQ_TYPE_KERNEL;
916
917                 rc = dev->ops->common->chain_alloc(dev->cdev,
918                                                    QED_CHAIN_USE_TO_CONSUME,
919                                                    QED_CHAIN_MODE_PBL,
920                                                    QED_CHAIN_CNT_TYPE_U32,
921                                                    chain_entries,
922                                                    sizeof(union rdma_cqe),
923                                                    &cq->pbl, NULL);
924                 if (rc)
925                         goto err1;
926
927                 page_cnt = qed_chain_get_page_cnt(&cq->pbl);
928                 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
929                 cq->ibcq.cqe = cq->pbl.capacity;
930         }
931
932         qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
933                             pbl_ptr, &params);
934
935         rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
936         if (rc)
937                 goto err2;
938
939         cq->icid = icid;
940         cq->sig = QEDR_CQ_MAGIC_NUMBER;
941         spin_lock_init(&cq->cq_lock);
942
943         if (ib_ctx) {
944                 rc = qedr_copy_cq_uresp(dev, cq, udata);
945                 if (rc)
946                         goto err3;
947         } else {
948                 /* Generate doorbell address. */
949                 cq->db_addr = dev->db_addr +
950                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
951                 cq->db.data.icid = cq->icid;
952                 cq->db.data.params = DB_AGG_CMD_SET <<
953                     RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
954
955                 /* point to the very last element, passing it we will toggle */
956                 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
957                 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
958                 cq->latest_cqe = NULL;
959                 consume_cqe(cq);
960                 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
961         }
962
963         DP_DEBUG(dev, QEDR_MSG_CQ,
964                  "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
965                  cq->icid, cq, params.cq_size);
966
967         return &cq->ibcq;
968
969 err3:
970         destroy_iparams.icid = cq->icid;
971         dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
972                                   &destroy_oparams);
973 err2:
974         if (udata)
975                 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
976         else
977                 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
978 err1:
979         if (udata)
980                 ib_umem_release(cq->q.umem);
981 err0:
982         kfree(cq);
983         return ERR_PTR(-EINVAL);
984 }
985
986 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
987 {
988         struct qedr_dev *dev = get_qedr_dev(ibcq->device);
989         struct qedr_cq *cq = get_qedr_cq(ibcq);
990
991         DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
992
993         return 0;
994 }
995
996 #define QEDR_DESTROY_CQ_MAX_ITERATIONS          (10)
997 #define QEDR_DESTROY_CQ_ITER_DURATION           (10)
998
999 int qedr_destroy_cq(struct ib_cq *ibcq)
1000 {
1001         struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1002         struct qed_rdma_destroy_cq_out_params oparams;
1003         struct qed_rdma_destroy_cq_in_params iparams;
1004         struct qedr_cq *cq = get_qedr_cq(ibcq);
1005         int iter;
1006         int rc;
1007
1008         DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1009
1010         cq->destroyed = 1;
1011
1012         /* GSIs CQs are handled by driver, so they don't exist in the FW */
1013         if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1014                 goto done;
1015
1016         iparams.icid = cq->icid;
1017         rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1018         if (rc)
1019                 return rc;
1020
1021         dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1022
1023         if (ibcq->uobject && ibcq->uobject->context) {
1024                 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1025                 ib_umem_release(cq->q.umem);
1026         }
1027
1028         /* We don't want the IRQ handler to handle a non-existing CQ so we
1029          * wait until all CNQ interrupts, if any, are received. This will always
1030          * happen and will always happen very fast. If not, then a serious error
1031          * has occured. That is why we can use a long delay.
1032          * We spin for a short time so we don’t lose time on context switching
1033          * in case all the completions are handled in that span. Otherwise
1034          * we sleep for a while and check again. Since the CNQ may be
1035          * associated with (only) the current CPU we use msleep to allow the
1036          * current CPU to be freed.
1037          * The CNQ notification is increased in qedr_irq_handler().
1038          */
1039         iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1040         while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1041                 udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1042                 iter--;
1043         }
1044
1045         iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1046         while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1047                 msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1048                 iter--;
1049         }
1050
1051         if (oparams.num_cq_notif != cq->cnq_notif)
1052                 goto err;
1053
1054         /* Note that we don't need to have explicit code to wait for the
1055          * completion of the event handler because it is invoked from the EQ.
1056          * Since the destroy CQ ramrod has also been received on the EQ we can
1057          * be certain that there's no event handler in process.
1058          */
1059 done:
1060         cq->sig = ~cq->sig;
1061
1062         kfree(cq);
1063
1064         return 0;
1065
1066 err:
1067         DP_ERR(dev,
1068                "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1069                cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1070
1071         return -EINVAL;
1072 }
1073
1074 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1075                                           struct ib_qp_attr *attr,
1076                                           int attr_mask,
1077                                           struct qed_rdma_modify_qp_in_params
1078                                           *qp_params)
1079 {
1080         enum rdma_network_type nw_type;
1081         struct ib_gid_attr gid_attr;
1082         const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1083         union ib_gid gid;
1084         u32 ipv4_addr;
1085         int rc = 0;
1086         int i;
1087
1088         rc = ib_get_cached_gid(ibqp->device,
1089                                rdma_ah_get_port_num(&attr->ah_attr),
1090                                grh->sgid_index, &gid, &gid_attr);
1091         if (rc)
1092                 return rc;
1093
1094         qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1095
1096         dev_put(gid_attr.ndev);
1097         nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1098         switch (nw_type) {
1099         case RDMA_NETWORK_IPV6:
1100                 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1101                        sizeof(qp_params->sgid));
1102                 memcpy(&qp_params->dgid.bytes[0],
1103                        &grh->dgid,
1104                        sizeof(qp_params->dgid));
1105                 qp_params->roce_mode = ROCE_V2_IPV6;
1106                 SET_FIELD(qp_params->modify_flags,
1107                           QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1108                 break;
1109         case RDMA_NETWORK_IB:
1110                 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1111                        sizeof(qp_params->sgid));
1112                 memcpy(&qp_params->dgid.bytes[0],
1113                        &grh->dgid,
1114                        sizeof(qp_params->dgid));
1115                 qp_params->roce_mode = ROCE_V1;
1116                 break;
1117         case RDMA_NETWORK_IPV4:
1118                 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1119                 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1120                 ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1121                 qp_params->sgid.ipv4_addr = ipv4_addr;
1122                 ipv4_addr =
1123                     qedr_get_ipv4_from_gid(grh->dgid.raw);
1124                 qp_params->dgid.ipv4_addr = ipv4_addr;
1125                 SET_FIELD(qp_params->modify_flags,
1126                           QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1127                 qp_params->roce_mode = ROCE_V2_IPV4;
1128                 break;
1129         }
1130
1131         for (i = 0; i < 4; i++) {
1132                 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1133                 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1134         }
1135
1136         if (qp_params->vlan_id >= VLAN_CFI_MASK)
1137                 qp_params->vlan_id = 0;
1138
1139         return 0;
1140 }
1141
1142 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1143                                struct ib_qp_init_attr *attrs)
1144 {
1145         struct qedr_device_attr *qattr = &dev->attr;
1146
1147         /* QP0... attrs->qp_type == IB_QPT_GSI */
1148         if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1149                 DP_DEBUG(dev, QEDR_MSG_QP,
1150                          "create qp: unsupported qp type=0x%x requested\n",
1151                          attrs->qp_type);
1152                 return -EINVAL;
1153         }
1154
1155         if (attrs->cap.max_send_wr > qattr->max_sqe) {
1156                 DP_ERR(dev,
1157                        "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1158                        attrs->cap.max_send_wr, qattr->max_sqe);
1159                 return -EINVAL;
1160         }
1161
1162         if (attrs->cap.max_inline_data > qattr->max_inline) {
1163                 DP_ERR(dev,
1164                        "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1165                        attrs->cap.max_inline_data, qattr->max_inline);
1166                 return -EINVAL;
1167         }
1168
1169         if (attrs->cap.max_send_sge > qattr->max_sge) {
1170                 DP_ERR(dev,
1171                        "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1172                        attrs->cap.max_send_sge, qattr->max_sge);
1173                 return -EINVAL;
1174         }
1175
1176         if (attrs->cap.max_recv_sge > qattr->max_sge) {
1177                 DP_ERR(dev,
1178                        "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1179                        attrs->cap.max_recv_sge, qattr->max_sge);
1180                 return -EINVAL;
1181         }
1182
1183         /* Unprivileged user space cannot create special QP */
1184         if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1185                 DP_ERR(dev,
1186                        "create qp: userspace can't create special QPs of type=0x%x\n",
1187                        attrs->qp_type);
1188                 return -EINVAL;
1189         }
1190
1191         return 0;
1192 }
1193
1194 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1195                                struct qedr_create_qp_uresp *uresp,
1196                                struct qedr_qp *qp)
1197 {
1198         /* iWARP requires two doorbells per RQ. */
1199         if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1200                 uresp->rq_db_offset =
1201                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1202                 uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1203         } else {
1204                 uresp->rq_db_offset =
1205                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1206         }
1207
1208         uresp->rq_icid = qp->icid;
1209 }
1210
1211 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1212                                struct qedr_create_qp_uresp *uresp,
1213                                struct qedr_qp *qp)
1214 {
1215         uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1216
1217         /* iWARP uses the same cid for rq and sq */
1218         if (rdma_protocol_iwarp(&dev->ibdev, 1))
1219                 uresp->sq_icid = qp->icid;
1220         else
1221                 uresp->sq_icid = qp->icid + 1;
1222 }
1223
1224 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1225                               struct qedr_qp *qp, struct ib_udata *udata)
1226 {
1227         struct qedr_create_qp_uresp uresp;
1228         int rc;
1229
1230         memset(&uresp, 0, sizeof(uresp));
1231         qedr_copy_sq_uresp(dev, &uresp, qp);
1232         qedr_copy_rq_uresp(dev, &uresp, qp);
1233
1234         uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1235         uresp.qp_id = qp->qp_id;
1236
1237         rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1238         if (rc)
1239                 DP_ERR(dev,
1240                        "create qp: failed a copy to user space with qp icid=0x%x.\n",
1241                        qp->icid);
1242
1243         return rc;
1244 }
1245
1246 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1247                                       struct qedr_qp *qp,
1248                                       struct qedr_pd *pd,
1249                                       struct ib_qp_init_attr *attrs)
1250 {
1251         spin_lock_init(&qp->q_lock);
1252         atomic_set(&qp->refcnt, 1);
1253         qp->pd = pd;
1254         qp->qp_type = attrs->qp_type;
1255         qp->max_inline_data = attrs->cap.max_inline_data;
1256         qp->sq.max_sges = attrs->cap.max_send_sge;
1257         qp->state = QED_ROCE_QP_STATE_RESET;
1258         qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1259         qp->sq_cq = get_qedr_cq(attrs->send_cq);
1260         qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1261         qp->dev = dev;
1262         qp->rq.max_sges = attrs->cap.max_recv_sge;
1263
1264         DP_DEBUG(dev, QEDR_MSG_QP,
1265                  "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1266                  qp->rq.max_sges, qp->rq_cq->icid);
1267         DP_DEBUG(dev, QEDR_MSG_QP,
1268                  "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1269                  pd->pd_id, qp->qp_type, qp->max_inline_data,
1270                  qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1271         DP_DEBUG(dev, QEDR_MSG_QP,
1272                  "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1273                  qp->sq.max_sges, qp->sq_cq->icid);
1274 }
1275
1276 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1277 {
1278         qp->sq.db = dev->db_addr +
1279                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1280         qp->sq.db_data.data.icid = qp->icid + 1;
1281         qp->rq.db = dev->db_addr +
1282                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1283         qp->rq.db_data.data.icid = qp->icid;
1284 }
1285
1286 static inline void
1287 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1288                               struct qedr_pd *pd,
1289                               struct qedr_qp *qp,
1290                               struct ib_qp_init_attr *attrs,
1291                               bool fmr_and_reserved_lkey,
1292                               struct qed_rdma_create_qp_in_params *params)
1293 {
1294         /* QP handle to be written in an async event */
1295         params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1296         params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1297
1298         params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1299         params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1300         params->pd = pd->pd_id;
1301         params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1302         params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1303         params->stats_queue = 0;
1304         params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1305         params->srq_id = 0;
1306         params->use_srq = false;
1307 }
1308
1309 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1310 {
1311         DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1312                  "qp=%p. "
1313                  "sq_addr=0x%llx, "
1314                  "sq_len=%zd, "
1315                  "rq_addr=0x%llx, "
1316                  "rq_len=%zd"
1317                  "\n",
1318                  qp,
1319                  qp->usq.buf_addr,
1320                  qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1321 }
1322
1323 static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id)
1324 {
1325         int rc;
1326
1327         if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1328                 return 0;
1329
1330         idr_preload(GFP_KERNEL);
1331         spin_lock_irq(&dev->idr_lock);
1332
1333         rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC);
1334
1335         spin_unlock_irq(&dev->idr_lock);
1336         idr_preload_end();
1337
1338         return rc < 0 ? rc : 0;
1339 }
1340
1341 static void qedr_idr_remove(struct qedr_dev *dev, u32 id)
1342 {
1343         if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1344                 return;
1345
1346         spin_lock_irq(&dev->idr_lock);
1347         idr_remove(&dev->qpidr, id);
1348         spin_unlock_irq(&dev->idr_lock);
1349 }
1350
1351 static inline void
1352 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1353                             struct qedr_qp *qp,
1354                             struct qed_rdma_create_qp_out_params *out_params)
1355 {
1356         qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1357         qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1358
1359         qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1360                            &qp->usq.pbl_info, FW_PAGE_SHIFT);
1361
1362         qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1363         qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1364
1365         qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1366                            &qp->urq.pbl_info, FW_PAGE_SHIFT);
1367 }
1368
1369 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1370 {
1371         if (qp->usq.umem)
1372                 ib_umem_release(qp->usq.umem);
1373         qp->usq.umem = NULL;
1374
1375         if (qp->urq.umem)
1376                 ib_umem_release(qp->urq.umem);
1377         qp->urq.umem = NULL;
1378 }
1379
1380 static int qedr_create_user_qp(struct qedr_dev *dev,
1381                                struct qedr_qp *qp,
1382                                struct ib_pd *ibpd,
1383                                struct ib_udata *udata,
1384                                struct ib_qp_init_attr *attrs)
1385 {
1386         struct qed_rdma_create_qp_in_params in_params;
1387         struct qed_rdma_create_qp_out_params out_params;
1388         struct qedr_pd *pd = get_qedr_pd(ibpd);
1389         struct ib_ucontext *ib_ctx = NULL;
1390         struct qedr_create_qp_ureq ureq;
1391         int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1392         int rc = -EINVAL;
1393
1394         ib_ctx = ibpd->uobject->context;
1395
1396         memset(&ureq, 0, sizeof(ureq));
1397         rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1398         if (rc) {
1399                 DP_ERR(dev, "Problem copying data from user space\n");
1400                 return rc;
1401         }
1402
1403         /* SQ - read access only (0), dma sync not required (0) */
1404         rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1405                                   ureq.sq_len, 0, 0, alloc_and_init);
1406         if (rc)
1407                 return rc;
1408
1409         /* RQ - read access only (0), dma sync not required (0) */
1410         rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1411                                   ureq.rq_len, 0, 0, alloc_and_init);
1412         if (rc)
1413                 return rc;
1414
1415         memset(&in_params, 0, sizeof(in_params));
1416         qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1417         in_params.qp_handle_lo = ureq.qp_handle_lo;
1418         in_params.qp_handle_hi = ureq.qp_handle_hi;
1419         in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1420         in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1421         in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1422         in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1423
1424         qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1425                                               &in_params, &out_params);
1426
1427         if (!qp->qed_qp) {
1428                 rc = -ENOMEM;
1429                 goto err1;
1430         }
1431
1432         if (rdma_protocol_iwarp(&dev->ibdev, 1))
1433                 qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1434
1435         qp->qp_id = out_params.qp_id;
1436         qp->icid = out_params.icid;
1437
1438         rc = qedr_copy_qp_uresp(dev, qp, udata);
1439         if (rc)
1440                 goto err;
1441
1442         qedr_qp_user_print(dev, qp);
1443
1444         return 0;
1445 err:
1446         rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1447         if (rc)
1448                 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1449
1450 err1:
1451         qedr_cleanup_user(dev, qp);
1452         return rc;
1453 }
1454
1455 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1456 {
1457         qp->sq.db = dev->db_addr +
1458             DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1459         qp->sq.db_data.data.icid = qp->icid;
1460
1461         qp->rq.db = dev->db_addr +
1462                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1463         qp->rq.db_data.data.icid = qp->icid;
1464         qp->rq.iwarp_db2 = dev->db_addr +
1465                            DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1466         qp->rq.iwarp_db2_data.data.icid = qp->icid;
1467         qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1468 }
1469
1470 static int
1471 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1472                            struct qedr_qp *qp,
1473                            struct qed_rdma_create_qp_in_params *in_params,
1474                            u32 n_sq_elems, u32 n_rq_elems)
1475 {
1476         struct qed_rdma_create_qp_out_params out_params;
1477         int rc;
1478
1479         rc = dev->ops->common->chain_alloc(dev->cdev,
1480                                            QED_CHAIN_USE_TO_PRODUCE,
1481                                            QED_CHAIN_MODE_PBL,
1482                                            QED_CHAIN_CNT_TYPE_U32,
1483                                            n_sq_elems,
1484                                            QEDR_SQE_ELEMENT_SIZE,
1485                                            &qp->sq.pbl, NULL);
1486
1487         if (rc)
1488                 return rc;
1489
1490         in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1491         in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1492
1493         rc = dev->ops->common->chain_alloc(dev->cdev,
1494                                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1495                                            QED_CHAIN_MODE_PBL,
1496                                            QED_CHAIN_CNT_TYPE_U32,
1497                                            n_rq_elems,
1498                                            QEDR_RQE_ELEMENT_SIZE,
1499                                            &qp->rq.pbl, NULL);
1500         if (rc)
1501                 return rc;
1502
1503         in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1504         in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1505
1506         qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1507                                               in_params, &out_params);
1508
1509         if (!qp->qed_qp)
1510                 return -EINVAL;
1511
1512         qp->qp_id = out_params.qp_id;
1513         qp->icid = out_params.icid;
1514
1515         qedr_set_roce_db_info(dev, qp);
1516         return rc;
1517 }
1518
1519 static int
1520 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1521                             struct qedr_qp *qp,
1522                             struct qed_rdma_create_qp_in_params *in_params,
1523                             u32 n_sq_elems, u32 n_rq_elems)
1524 {
1525         struct qed_rdma_create_qp_out_params out_params;
1526         struct qed_chain_ext_pbl ext_pbl;
1527         int rc;
1528
1529         in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1530                                                      QEDR_SQE_ELEMENT_SIZE,
1531                                                      QED_CHAIN_MODE_PBL);
1532         in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1533                                                      QEDR_RQE_ELEMENT_SIZE,
1534                                                      QED_CHAIN_MODE_PBL);
1535
1536         qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1537                                               in_params, &out_params);
1538
1539         if (!qp->qed_qp)
1540                 return -EINVAL;
1541
1542         /* Now we allocate the chain */
1543         ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1544         ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1545
1546         rc = dev->ops->common->chain_alloc(dev->cdev,
1547                                            QED_CHAIN_USE_TO_PRODUCE,
1548                                            QED_CHAIN_MODE_PBL,
1549                                            QED_CHAIN_CNT_TYPE_U32,
1550                                            n_sq_elems,
1551                                            QEDR_SQE_ELEMENT_SIZE,
1552                                            &qp->sq.pbl, &ext_pbl);
1553
1554         if (rc)
1555                 goto err;
1556
1557         ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1558         ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1559
1560         rc = dev->ops->common->chain_alloc(dev->cdev,
1561                                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1562                                            QED_CHAIN_MODE_PBL,
1563                                            QED_CHAIN_CNT_TYPE_U32,
1564                                            n_rq_elems,
1565                                            QEDR_RQE_ELEMENT_SIZE,
1566                                            &qp->rq.pbl, &ext_pbl);
1567
1568         if (rc)
1569                 goto err;
1570
1571         qp->qp_id = out_params.qp_id;
1572         qp->icid = out_params.icid;
1573
1574         qedr_set_iwarp_db_info(dev, qp);
1575         return rc;
1576
1577 err:
1578         dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1579
1580         return rc;
1581 }
1582
1583 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1584 {
1585         dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1586         kfree(qp->wqe_wr_id);
1587
1588         dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1589         kfree(qp->rqe_wr_id);
1590 }
1591
1592 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1593                                  struct qedr_qp *qp,
1594                                  struct ib_pd *ibpd,
1595                                  struct ib_qp_init_attr *attrs)
1596 {
1597         struct qed_rdma_create_qp_in_params in_params;
1598         struct qedr_pd *pd = get_qedr_pd(ibpd);
1599         int rc = -EINVAL;
1600         u32 n_rq_elems;
1601         u32 n_sq_elems;
1602         u32 n_sq_entries;
1603
1604         memset(&in_params, 0, sizeof(in_params));
1605
1606         /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1607          * the ring. The ring should allow at least a single WR, even if the
1608          * user requested none, due to allocation issues.
1609          * We should add an extra WR since the prod and cons indices of
1610          * wqe_wr_id are managed in such a way that the WQ is considered full
1611          * when (prod+1)%max_wr==cons. We currently don't do that because we
1612          * double the number of entries due an iSER issue that pushes far more
1613          * WRs than indicated. If we decline its ib_post_send() then we get
1614          * error prints in the dmesg we'd like to avoid.
1615          */
1616         qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1617                               dev->attr.max_sqe);
1618
1619         qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1620                                 GFP_KERNEL);
1621         if (!qp->wqe_wr_id) {
1622                 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1623                 return -ENOMEM;
1624         }
1625
1626         /* QP handle to be written in CQE */
1627         in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1628         in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1629
1630         /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1631          * the ring. There ring should allow at least a single WR, even if the
1632          * user requested none, due to allocation issues.
1633          */
1634         qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1635
1636         /* Allocate driver internal RQ array */
1637         qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1638                                 GFP_KERNEL);
1639         if (!qp->rqe_wr_id) {
1640                 DP_ERR(dev,
1641                        "create qp: failed RQ shadow memory allocation\n");
1642                 kfree(qp->wqe_wr_id);
1643                 return -ENOMEM;
1644         }
1645
1646         qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1647
1648         n_sq_entries = attrs->cap.max_send_wr;
1649         n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1650         n_sq_entries = max_t(u32, n_sq_entries, 1);
1651         n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1652
1653         n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1654
1655         if (rdma_protocol_iwarp(&dev->ibdev, 1))
1656                 rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1657                                                  n_sq_elems, n_rq_elems);
1658         else
1659                 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1660                                                 n_sq_elems, n_rq_elems);
1661         if (rc)
1662                 qedr_cleanup_kernel(dev, qp);
1663
1664         return rc;
1665 }
1666
1667 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1668                              struct ib_qp_init_attr *attrs,
1669                              struct ib_udata *udata)
1670 {
1671         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1672         struct qedr_pd *pd = get_qedr_pd(ibpd);
1673         struct qedr_qp *qp;
1674         struct ib_qp *ibqp;
1675         int rc = 0;
1676
1677         DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1678                  udata ? "user library" : "kernel", pd);
1679
1680         rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1681         if (rc)
1682                 return ERR_PTR(rc);
1683
1684         if (attrs->srq)
1685                 return ERR_PTR(-EINVAL);
1686
1687         DP_DEBUG(dev, QEDR_MSG_QP,
1688                  "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1689                  udata ? "user library" : "kernel", attrs->event_handler, pd,
1690                  get_qedr_cq(attrs->send_cq),
1691                  get_qedr_cq(attrs->send_cq)->icid,
1692                  get_qedr_cq(attrs->recv_cq),
1693                  get_qedr_cq(attrs->recv_cq)->icid);
1694
1695         qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1696         if (!qp) {
1697                 DP_ERR(dev, "create qp: failed allocating memory\n");
1698                 return ERR_PTR(-ENOMEM);
1699         }
1700
1701         qedr_set_common_qp_params(dev, qp, pd, attrs);
1702
1703         if (attrs->qp_type == IB_QPT_GSI) {
1704                 ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1705                 if (IS_ERR(ibqp))
1706                         kfree(qp);
1707                 return ibqp;
1708         }
1709
1710         if (udata)
1711                 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1712         else
1713                 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1714
1715         if (rc)
1716                 goto err;
1717
1718         qp->ibqp.qp_num = qp->qp_id;
1719
1720         rc = qedr_idr_add(dev, qp, qp->qp_id);
1721         if (rc)
1722                 goto err;
1723
1724         return &qp->ibqp;
1725
1726 err:
1727         kfree(qp);
1728
1729         return ERR_PTR(-EFAULT);
1730 }
1731
1732 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1733 {
1734         switch (qp_state) {
1735         case QED_ROCE_QP_STATE_RESET:
1736                 return IB_QPS_RESET;
1737         case QED_ROCE_QP_STATE_INIT:
1738                 return IB_QPS_INIT;
1739         case QED_ROCE_QP_STATE_RTR:
1740                 return IB_QPS_RTR;
1741         case QED_ROCE_QP_STATE_RTS:
1742                 return IB_QPS_RTS;
1743         case QED_ROCE_QP_STATE_SQD:
1744                 return IB_QPS_SQD;
1745         case QED_ROCE_QP_STATE_ERR:
1746                 return IB_QPS_ERR;
1747         case QED_ROCE_QP_STATE_SQE:
1748                 return IB_QPS_SQE;
1749         }
1750         return IB_QPS_ERR;
1751 }
1752
1753 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1754                                         enum ib_qp_state qp_state)
1755 {
1756         switch (qp_state) {
1757         case IB_QPS_RESET:
1758                 return QED_ROCE_QP_STATE_RESET;
1759         case IB_QPS_INIT:
1760                 return QED_ROCE_QP_STATE_INIT;
1761         case IB_QPS_RTR:
1762                 return QED_ROCE_QP_STATE_RTR;
1763         case IB_QPS_RTS:
1764                 return QED_ROCE_QP_STATE_RTS;
1765         case IB_QPS_SQD:
1766                 return QED_ROCE_QP_STATE_SQD;
1767         case IB_QPS_ERR:
1768                 return QED_ROCE_QP_STATE_ERR;
1769         default:
1770                 return QED_ROCE_QP_STATE_ERR;
1771         }
1772 }
1773
1774 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1775 {
1776         qed_chain_reset(&qph->pbl);
1777         qph->prod = 0;
1778         qph->cons = 0;
1779         qph->wqe_cons = 0;
1780         qph->db_data.data.value = cpu_to_le16(0);
1781 }
1782
1783 static int qedr_update_qp_state(struct qedr_dev *dev,
1784                                 struct qedr_qp *qp,
1785                                 enum qed_roce_qp_state cur_state,
1786                                 enum qed_roce_qp_state new_state)
1787 {
1788         int status = 0;
1789
1790         if (new_state == cur_state)
1791                 return 0;
1792
1793         switch (cur_state) {
1794         case QED_ROCE_QP_STATE_RESET:
1795                 switch (new_state) {
1796                 case QED_ROCE_QP_STATE_INIT:
1797                         qp->prev_wqe_size = 0;
1798                         qedr_reset_qp_hwq_info(&qp->sq);
1799                         qedr_reset_qp_hwq_info(&qp->rq);
1800                         break;
1801                 default:
1802                         status = -EINVAL;
1803                         break;
1804                 };
1805                 break;
1806         case QED_ROCE_QP_STATE_INIT:
1807                 switch (new_state) {
1808                 case QED_ROCE_QP_STATE_RTR:
1809                         /* Update doorbell (in case post_recv was
1810                          * done before move to RTR)
1811                          */
1812
1813                         if (rdma_protocol_roce(&dev->ibdev, 1)) {
1814                                 writel(qp->rq.db_data.raw, qp->rq.db);
1815                                 /* Make sure write takes effect */
1816                                 mmiowb();
1817                         }
1818                         break;
1819                 case QED_ROCE_QP_STATE_ERR:
1820                         break;
1821                 default:
1822                         /* Invalid state change. */
1823                         status = -EINVAL;
1824                         break;
1825                 };
1826                 break;
1827         case QED_ROCE_QP_STATE_RTR:
1828                 /* RTR->XXX */
1829                 switch (new_state) {
1830                 case QED_ROCE_QP_STATE_RTS:
1831                         break;
1832                 case QED_ROCE_QP_STATE_ERR:
1833                         break;
1834                 default:
1835                         /* Invalid state change. */
1836                         status = -EINVAL;
1837                         break;
1838                 };
1839                 break;
1840         case QED_ROCE_QP_STATE_RTS:
1841                 /* RTS->XXX */
1842                 switch (new_state) {
1843                 case QED_ROCE_QP_STATE_SQD:
1844                         break;
1845                 case QED_ROCE_QP_STATE_ERR:
1846                         break;
1847                 default:
1848                         /* Invalid state change. */
1849                         status = -EINVAL;
1850                         break;
1851                 };
1852                 break;
1853         case QED_ROCE_QP_STATE_SQD:
1854                 /* SQD->XXX */
1855                 switch (new_state) {
1856                 case QED_ROCE_QP_STATE_RTS:
1857                 case QED_ROCE_QP_STATE_ERR:
1858                         break;
1859                 default:
1860                         /* Invalid state change. */
1861                         status = -EINVAL;
1862                         break;
1863                 };
1864                 break;
1865         case QED_ROCE_QP_STATE_ERR:
1866                 /* ERR->XXX */
1867                 switch (new_state) {
1868                 case QED_ROCE_QP_STATE_RESET:
1869                         if ((qp->rq.prod != qp->rq.cons) ||
1870                             (qp->sq.prod != qp->sq.cons)) {
1871                                 DP_NOTICE(dev,
1872                                           "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1873                                           qp->rq.prod, qp->rq.cons, qp->sq.prod,
1874                                           qp->sq.cons);
1875                                 status = -EINVAL;
1876                         }
1877                         break;
1878                 default:
1879                         status = -EINVAL;
1880                         break;
1881                 };
1882                 break;
1883         default:
1884                 status = -EINVAL;
1885                 break;
1886         };
1887
1888         return status;
1889 }
1890
1891 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1892                    int attr_mask, struct ib_udata *udata)
1893 {
1894         struct qedr_qp *qp = get_qedr_qp(ibqp);
1895         struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1896         struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1897         const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1898         enum ib_qp_state old_qp_state, new_qp_state;
1899         enum qed_roce_qp_state cur_state;
1900         int rc = 0;
1901
1902         DP_DEBUG(dev, QEDR_MSG_QP,
1903                  "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1904                  attr->qp_state);
1905
1906         old_qp_state = qedr_get_ibqp_state(qp->state);
1907         if (attr_mask & IB_QP_STATE)
1908                 new_qp_state = attr->qp_state;
1909         else
1910                 new_qp_state = old_qp_state;
1911
1912         if (rdma_protocol_roce(&dev->ibdev, 1)) {
1913                 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
1914                                         ibqp->qp_type, attr_mask,
1915                                         IB_LINK_LAYER_ETHERNET)) {
1916                         DP_ERR(dev,
1917                                "modify qp: invalid attribute mask=0x%x specified for\n"
1918                                "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1919                                attr_mask, qp->qp_id, ibqp->qp_type,
1920                                old_qp_state, new_qp_state);
1921                         rc = -EINVAL;
1922                         goto err;
1923                 }
1924         }
1925
1926         /* Translate the masks... */
1927         if (attr_mask & IB_QP_STATE) {
1928                 SET_FIELD(qp_params.modify_flags,
1929                           QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1930                 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1931         }
1932
1933         if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1934                 qp_params.sqd_async = true;
1935
1936         if (attr_mask & IB_QP_PKEY_INDEX) {
1937                 SET_FIELD(qp_params.modify_flags,
1938                           QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1939                 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1940                         rc = -EINVAL;
1941                         goto err;
1942                 }
1943
1944                 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1945         }
1946
1947         if (attr_mask & IB_QP_QKEY)
1948                 qp->qkey = attr->qkey;
1949
1950         if (attr_mask & IB_QP_ACCESS_FLAGS) {
1951                 SET_FIELD(qp_params.modify_flags,
1952                           QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1953                 qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1954                                                   IB_ACCESS_REMOTE_READ;
1955                 qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1956                                                    IB_ACCESS_REMOTE_WRITE;
1957                 qp_params.incoming_atomic_en = attr->qp_access_flags &
1958                                                IB_ACCESS_REMOTE_ATOMIC;
1959         }
1960
1961         if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1962                 if (attr_mask & IB_QP_PATH_MTU) {
1963                         if (attr->path_mtu < IB_MTU_256 ||
1964                             attr->path_mtu > IB_MTU_4096) {
1965                                 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1966                                 rc = -EINVAL;
1967                                 goto err;
1968                         }
1969                         qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1970                                       ib_mtu_enum_to_int(iboe_get_mtu
1971                                                          (dev->ndev->mtu)));
1972                 }
1973
1974                 if (!qp->mtu) {
1975                         qp->mtu =
1976                         ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1977                         pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1978                 }
1979
1980                 SET_FIELD(qp_params.modify_flags,
1981                           QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1982
1983                 qp_params.traffic_class_tos = grh->traffic_class;
1984                 qp_params.flow_label = grh->flow_label;
1985                 qp_params.hop_limit_ttl = grh->hop_limit;
1986
1987                 qp->sgid_idx = grh->sgid_index;
1988
1989                 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1990                 if (rc) {
1991                         DP_ERR(dev,
1992                                "modify qp: problems with GID index %d (rc=%d)\n",
1993                                grh->sgid_index, rc);
1994                         return rc;
1995                 }
1996
1997                 rc = qedr_get_dmac(dev, &attr->ah_attr,
1998                                    qp_params.remote_mac_addr);
1999                 if (rc)
2000                         return rc;
2001
2002                 qp_params.use_local_mac = true;
2003                 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2004
2005                 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2006                          qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2007                          qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2008                 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2009                          qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2010                          qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2011                 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2012                          qp_params.remote_mac_addr);
2013
2014                 qp_params.mtu = qp->mtu;
2015                 qp_params.lb_indication = false;
2016         }
2017
2018         if (!qp_params.mtu) {
2019                 /* Stay with current MTU */
2020                 if (qp->mtu)
2021                         qp_params.mtu = qp->mtu;
2022                 else
2023                         qp_params.mtu =
2024                             ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2025         }
2026
2027         if (attr_mask & IB_QP_TIMEOUT) {
2028                 SET_FIELD(qp_params.modify_flags,
2029                           QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2030
2031                 /* The received timeout value is an exponent used like this:
2032                  *    "12.7.34 LOCAL ACK TIMEOUT
2033                  *    Value representing the transport (ACK) timeout for use by
2034                  *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2035                  * The FW expects timeout in msec so we need to divide the usec
2036                  * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2037                  * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2038                  * The value of zero means infinite so we use a 'max_t' to make
2039                  * sure that sub 1 msec values will be configured as 1 msec.
2040                  */
2041                 if (attr->timeout)
2042                         qp_params.ack_timeout =
2043                                         1 << max_t(int, attr->timeout - 8, 0);
2044                 else
2045                         qp_params.ack_timeout = 0;
2046         }
2047
2048         if (attr_mask & IB_QP_RETRY_CNT) {
2049                 SET_FIELD(qp_params.modify_flags,
2050                           QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2051                 qp_params.retry_cnt = attr->retry_cnt;
2052         }
2053
2054         if (attr_mask & IB_QP_RNR_RETRY) {
2055                 SET_FIELD(qp_params.modify_flags,
2056                           QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2057                 qp_params.rnr_retry_cnt = attr->rnr_retry;
2058         }
2059
2060         if (attr_mask & IB_QP_RQ_PSN) {
2061                 SET_FIELD(qp_params.modify_flags,
2062                           QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2063                 qp_params.rq_psn = attr->rq_psn;
2064                 qp->rq_psn = attr->rq_psn;
2065         }
2066
2067         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2068                 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2069                         rc = -EINVAL;
2070                         DP_ERR(dev,
2071                                "unsupported max_rd_atomic=%d, supported=%d\n",
2072                                attr->max_rd_atomic,
2073                                dev->attr.max_qp_req_rd_atomic_resc);
2074                         goto err;
2075                 }
2076
2077                 SET_FIELD(qp_params.modify_flags,
2078                           QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2079                 qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2080         }
2081
2082         if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2083                 SET_FIELD(qp_params.modify_flags,
2084                           QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2085                 qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2086         }
2087
2088         if (attr_mask & IB_QP_SQ_PSN) {
2089                 SET_FIELD(qp_params.modify_flags,
2090                           QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2091                 qp_params.sq_psn = attr->sq_psn;
2092                 qp->sq_psn = attr->sq_psn;
2093         }
2094
2095         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2096                 if (attr->max_dest_rd_atomic >
2097                     dev->attr.max_qp_resp_rd_atomic_resc) {
2098                         DP_ERR(dev,
2099                                "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2100                                attr->max_dest_rd_atomic,
2101                                dev->attr.max_qp_resp_rd_atomic_resc);
2102
2103                         rc = -EINVAL;
2104                         goto err;
2105                 }
2106
2107                 SET_FIELD(qp_params.modify_flags,
2108                           QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2109                 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2110         }
2111
2112         if (attr_mask & IB_QP_DEST_QPN) {
2113                 SET_FIELD(qp_params.modify_flags,
2114                           QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2115
2116                 qp_params.dest_qp = attr->dest_qp_num;
2117                 qp->dest_qp_num = attr->dest_qp_num;
2118         }
2119
2120         cur_state = qp->state;
2121
2122         /* Update the QP state before the actual ramrod to prevent a race with
2123          * fast path. Modifying the QP state to error will cause the device to
2124          * flush the CQEs and while polling the flushed CQEs will considered as
2125          * a potential issue if the QP isn't in error state.
2126          */
2127         if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2128             !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2129                 qp->state = QED_ROCE_QP_STATE_ERR;
2130
2131         if (qp->qp_type != IB_QPT_GSI)
2132                 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2133                                               qp->qed_qp, &qp_params);
2134
2135         if (attr_mask & IB_QP_STATE) {
2136                 if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2137                         rc = qedr_update_qp_state(dev, qp, cur_state,
2138                                                   qp_params.new_state);
2139                 qp->state = qp_params.new_state;
2140         }
2141
2142 err:
2143         return rc;
2144 }
2145
2146 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2147 {
2148         int ib_qp_acc_flags = 0;
2149
2150         if (params->incoming_rdma_write_en)
2151                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2152         if (params->incoming_rdma_read_en)
2153                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2154         if (params->incoming_atomic_en)
2155                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2156         ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2157         return ib_qp_acc_flags;
2158 }
2159
2160 int qedr_query_qp(struct ib_qp *ibqp,
2161                   struct ib_qp_attr *qp_attr,
2162                   int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2163 {
2164         struct qed_rdma_query_qp_out_params params;
2165         struct qedr_qp *qp = get_qedr_qp(ibqp);
2166         struct qedr_dev *dev = qp->dev;
2167         int rc = 0;
2168
2169         memset(&params, 0, sizeof(params));
2170
2171         rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2172         if (rc)
2173                 goto err;
2174
2175         memset(qp_attr, 0, sizeof(*qp_attr));
2176         memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2177
2178         qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2179         qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2180         qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2181         qp_attr->path_mig_state = IB_MIG_MIGRATED;
2182         qp_attr->rq_psn = params.rq_psn;
2183         qp_attr->sq_psn = params.sq_psn;
2184         qp_attr->dest_qp_num = params.dest_qp;
2185
2186         qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2187
2188         qp_attr->cap.max_send_wr = qp->sq.max_wr;
2189         qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2190         qp_attr->cap.max_send_sge = qp->sq.max_sges;
2191         qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2192         qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2193         qp_init_attr->cap = qp_attr->cap;
2194
2195         qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2196         rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2197                         params.flow_label, qp->sgid_idx,
2198                         params.hop_limit_ttl, params.traffic_class_tos);
2199         rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2200         rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2201         rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2202         qp_attr->timeout = params.timeout;
2203         qp_attr->rnr_retry = params.rnr_retry;
2204         qp_attr->retry_cnt = params.retry_cnt;
2205         qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2206         qp_attr->pkey_index = params.pkey_index;
2207         qp_attr->port_num = 1;
2208         rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2209         rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2210         qp_attr->alt_pkey_index = 0;
2211         qp_attr->alt_port_num = 0;
2212         qp_attr->alt_timeout = 0;
2213         memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2214
2215         qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2216         qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2217         qp_attr->max_rd_atomic = params.max_rd_atomic;
2218         qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2219
2220         DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2221                  qp_attr->cap.max_inline_data);
2222
2223 err:
2224         return rc;
2225 }
2226
2227 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2228 {
2229         int rc = 0;
2230
2231         if (qp->qp_type != IB_QPT_GSI) {
2232                 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2233                 if (rc)
2234                         return rc;
2235         }
2236
2237         if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2238                 qedr_cleanup_user(dev, qp);
2239         else
2240                 qedr_cleanup_kernel(dev, qp);
2241
2242         return 0;
2243 }
2244
2245 int qedr_destroy_qp(struct ib_qp *ibqp)
2246 {
2247         struct qedr_qp *qp = get_qedr_qp(ibqp);
2248         struct qedr_dev *dev = qp->dev;
2249         struct ib_qp_attr attr;
2250         int attr_mask = 0;
2251         int rc = 0;
2252
2253         DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2254                  qp, qp->qp_type);
2255
2256         if (rdma_protocol_roce(&dev->ibdev, 1)) {
2257                 if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2258                     (qp->state != QED_ROCE_QP_STATE_ERR) &&
2259                     (qp->state != QED_ROCE_QP_STATE_INIT)) {
2260
2261                         attr.qp_state = IB_QPS_ERR;
2262                         attr_mask |= IB_QP_STATE;
2263
2264                         /* Change the QP state to ERROR */
2265                         qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2266                 }
2267         } else {
2268                 /* Wait for the connect/accept to complete */
2269                 if (qp->ep) {
2270                         int wait_count = 1;
2271
2272                         while (qp->ep->during_connect) {
2273                                 DP_DEBUG(dev, QEDR_MSG_QP,
2274                                          "Still in during connect/accept\n");
2275
2276                                 msleep(100);
2277                                 if (wait_count++ > 200) {
2278                                         DP_NOTICE(dev,
2279                                                   "during connect timeout\n");
2280                                         break;
2281                                 }
2282                         }
2283                 }
2284         }
2285
2286         if (qp->qp_type == IB_QPT_GSI)
2287                 qedr_destroy_gsi_qp(dev);
2288
2289         qedr_free_qp_resources(dev, qp);
2290
2291         if (atomic_dec_and_test(&qp->refcnt)) {
2292                 qedr_idr_remove(dev, qp->qp_id);
2293                 kfree(qp);
2294         }
2295         return rc;
2296 }
2297
2298 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2299                              struct ib_udata *udata)
2300 {
2301         struct qedr_ah *ah;
2302
2303         ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2304         if (!ah)
2305                 return ERR_PTR(-ENOMEM);
2306
2307         ah->attr = *attr;
2308
2309         return &ah->ibah;
2310 }
2311
2312 int qedr_destroy_ah(struct ib_ah *ibah)
2313 {
2314         struct qedr_ah *ah = get_qedr_ah(ibah);
2315
2316         kfree(ah);
2317         return 0;
2318 }
2319
2320 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2321 {
2322         struct qedr_pbl *pbl, *tmp;
2323
2324         if (info->pbl_table)
2325                 list_add_tail(&info->pbl_table->list_entry,
2326                               &info->free_pbl_list);
2327
2328         if (!list_empty(&info->inuse_pbl_list))
2329                 list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2330
2331         list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2332                 list_del(&pbl->list_entry);
2333                 qedr_free_pbl(dev, &info->pbl_info, pbl);
2334         }
2335 }
2336
2337 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2338                         size_t page_list_len, bool two_layered)
2339 {
2340         struct qedr_pbl *tmp;
2341         int rc;
2342
2343         INIT_LIST_HEAD(&info->free_pbl_list);
2344         INIT_LIST_HEAD(&info->inuse_pbl_list);
2345
2346         rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2347                                   page_list_len, two_layered);
2348         if (rc)
2349                 goto done;
2350
2351         info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2352         if (IS_ERR(info->pbl_table)) {
2353                 rc = PTR_ERR(info->pbl_table);
2354                 goto done;
2355         }
2356
2357         DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2358                  &info->pbl_table->pa);
2359
2360         /* in usual case we use 2 PBLs, so we add one to free
2361          * list and allocating another one
2362          */
2363         tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2364         if (IS_ERR(tmp)) {
2365                 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2366                 goto done;
2367         }
2368
2369         list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2370
2371         DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2372
2373 done:
2374         if (rc)
2375                 free_mr_info(dev, info);
2376
2377         return rc;
2378 }
2379
2380 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2381                                u64 usr_addr, int acc, struct ib_udata *udata)
2382 {
2383         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2384         struct qedr_mr *mr;
2385         struct qedr_pd *pd;
2386         int rc = -ENOMEM;
2387
2388         pd = get_qedr_pd(ibpd);
2389         DP_DEBUG(dev, QEDR_MSG_MR,
2390                  "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2391                  pd->pd_id, start, len, usr_addr, acc);
2392
2393         if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2394                 return ERR_PTR(-EINVAL);
2395
2396         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2397         if (!mr)
2398                 return ERR_PTR(rc);
2399
2400         mr->type = QEDR_MR_USER;
2401
2402         mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2403         if (IS_ERR(mr->umem)) {
2404                 rc = -EFAULT;
2405                 goto err0;
2406         }
2407
2408         rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2409         if (rc)
2410                 goto err1;
2411
2412         qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2413                            &mr->info.pbl_info, mr->umem->page_shift);
2414
2415         rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2416         if (rc) {
2417                 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2418                 goto err1;
2419         }
2420
2421         /* Index only, 18 bit long, lkey = itid << 8 | key */
2422         mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2423         mr->hw_mr.key = 0;
2424         mr->hw_mr.pd = pd->pd_id;
2425         mr->hw_mr.local_read = 1;
2426         mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2427         mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2428         mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2429         mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2430         mr->hw_mr.mw_bind = false;
2431         mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2432         mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2433         mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2434         mr->hw_mr.page_size_log = mr->umem->page_shift;
2435         mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2436         mr->hw_mr.length = len;
2437         mr->hw_mr.vaddr = usr_addr;
2438         mr->hw_mr.zbva = false;
2439         mr->hw_mr.phy_mr = false;
2440         mr->hw_mr.dma_mr = false;
2441
2442         rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2443         if (rc) {
2444                 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2445                 goto err2;
2446         }
2447
2448         mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2449         if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2450             mr->hw_mr.remote_atomic)
2451                 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2452
2453         DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2454                  mr->ibmr.lkey);
2455         return &mr->ibmr;
2456
2457 err2:
2458         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2459 err1:
2460         qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2461 err0:
2462         kfree(mr);
2463         return ERR_PTR(rc);
2464 }
2465
2466 int qedr_dereg_mr(struct ib_mr *ib_mr)
2467 {
2468         struct qedr_mr *mr = get_qedr_mr(ib_mr);
2469         struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2470         int rc = 0;
2471
2472         rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2473         if (rc)
2474                 return rc;
2475
2476         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2477
2478         if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2479                 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2480
2481         /* it could be user registered memory. */
2482         if (mr->umem)
2483                 ib_umem_release(mr->umem);
2484
2485         kfree(mr);
2486
2487         return rc;
2488 }
2489
2490 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2491                                        int max_page_list_len)
2492 {
2493         struct qedr_pd *pd = get_qedr_pd(ibpd);
2494         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2495         struct qedr_mr *mr;
2496         int rc = -ENOMEM;
2497
2498         DP_DEBUG(dev, QEDR_MSG_MR,
2499                  "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2500                  max_page_list_len);
2501
2502         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2503         if (!mr)
2504                 return ERR_PTR(rc);
2505
2506         mr->dev = dev;
2507         mr->type = QEDR_MR_FRMR;
2508
2509         rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2510         if (rc)
2511                 goto err0;
2512
2513         rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2514         if (rc) {
2515                 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2516                 goto err0;
2517         }
2518
2519         /* Index only, 18 bit long, lkey = itid << 8 | key */
2520         mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2521         mr->hw_mr.key = 0;
2522         mr->hw_mr.pd = pd->pd_id;
2523         mr->hw_mr.local_read = 1;
2524         mr->hw_mr.local_write = 0;
2525         mr->hw_mr.remote_read = 0;
2526         mr->hw_mr.remote_write = 0;
2527         mr->hw_mr.remote_atomic = 0;
2528         mr->hw_mr.mw_bind = false;
2529         mr->hw_mr.pbl_ptr = 0;
2530         mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2531         mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2532         mr->hw_mr.fbo = 0;
2533         mr->hw_mr.length = 0;
2534         mr->hw_mr.vaddr = 0;
2535         mr->hw_mr.zbva = false;
2536         mr->hw_mr.phy_mr = true;
2537         mr->hw_mr.dma_mr = false;
2538
2539         rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2540         if (rc) {
2541                 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2542                 goto err1;
2543         }
2544
2545         mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2546         mr->ibmr.rkey = mr->ibmr.lkey;
2547
2548         DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2549         return mr;
2550
2551 err1:
2552         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2553 err0:
2554         kfree(mr);
2555         return ERR_PTR(rc);
2556 }
2557
2558 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2559                             enum ib_mr_type mr_type, u32 max_num_sg)
2560 {
2561         struct qedr_mr *mr;
2562
2563         if (mr_type != IB_MR_TYPE_MEM_REG)
2564                 return ERR_PTR(-EINVAL);
2565
2566         mr = __qedr_alloc_mr(ibpd, max_num_sg);
2567
2568         if (IS_ERR(mr))
2569                 return ERR_PTR(-EINVAL);
2570
2571         return &mr->ibmr;
2572 }
2573
2574 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2575 {
2576         struct qedr_mr *mr = get_qedr_mr(ibmr);
2577         struct qedr_pbl *pbl_table;
2578         struct regpair *pbe;
2579         u32 pbes_in_page;
2580
2581         if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2582                 DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
2583                 return -ENOMEM;
2584         }
2585
2586         DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2587                  mr->npages, addr);
2588
2589         pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2590         pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2591         pbe = (struct regpair *)pbl_table->va;
2592         pbe +=  mr->npages % pbes_in_page;
2593         pbe->lo = cpu_to_le32((u32)addr);
2594         pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2595
2596         mr->npages++;
2597
2598         return 0;
2599 }
2600
2601 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2602 {
2603         int work = info->completed - info->completed_handled - 1;
2604
2605         DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2606         while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2607                 struct qedr_pbl *pbl;
2608
2609                 /* Free all the page list that are possible to be freed
2610                  * (all the ones that were invalidated), under the assumption
2611                  * that if an FMR was completed successfully that means that
2612                  * if there was an invalidate operation before it also ended
2613                  */
2614                 pbl = list_first_entry(&info->inuse_pbl_list,
2615                                        struct qedr_pbl, list_entry);
2616                 list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2617                 info->completed_handled++;
2618         }
2619 }
2620
2621 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2622                    int sg_nents, unsigned int *sg_offset)
2623 {
2624         struct qedr_mr *mr = get_qedr_mr(ibmr);
2625
2626         mr->npages = 0;
2627
2628         handle_completed_mrs(mr->dev, &mr->info);
2629         return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2630 }
2631
2632 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2633 {
2634         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2635         struct qedr_pd *pd = get_qedr_pd(ibpd);
2636         struct qedr_mr *mr;
2637         int rc;
2638
2639         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2640         if (!mr)
2641                 return ERR_PTR(-ENOMEM);
2642
2643         mr->type = QEDR_MR_DMA;
2644
2645         rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2646         if (rc) {
2647                 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2648                 goto err1;
2649         }
2650
2651         /* index only, 18 bit long, lkey = itid << 8 | key */
2652         mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2653         mr->hw_mr.pd = pd->pd_id;
2654         mr->hw_mr.local_read = 1;
2655         mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2656         mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2657         mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2658         mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2659         mr->hw_mr.dma_mr = true;
2660
2661         rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2662         if (rc) {
2663                 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2664                 goto err2;
2665         }
2666
2667         mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2668         if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2669             mr->hw_mr.remote_atomic)
2670                 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2671
2672         DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2673         return &mr->ibmr;
2674
2675 err2:
2676         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2677 err1:
2678         kfree(mr);
2679         return ERR_PTR(rc);
2680 }
2681
2682 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2683 {
2684         return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2685 }
2686
2687 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2688 {
2689         int i, len = 0;
2690
2691         for (i = 0; i < num_sge; i++)
2692                 len += sg_list[i].length;
2693
2694         return len;
2695 }
2696
2697 static void swap_wqe_data64(u64 *p)
2698 {
2699         int i;
2700
2701         for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2702                 *p = cpu_to_be64(cpu_to_le64(*p));
2703 }
2704
2705 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2706                                        struct qedr_qp *qp, u8 *wqe_size,
2707                                        struct ib_send_wr *wr,
2708                                        struct ib_send_wr **bad_wr, u8 *bits,
2709                                        u8 bit)
2710 {
2711         u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2712         char *seg_prt, *wqe;
2713         int i, seg_siz;
2714
2715         if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2716                 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2717                 *bad_wr = wr;
2718                 return 0;
2719         }
2720
2721         if (!data_size)
2722                 return data_size;
2723
2724         *bits |= bit;
2725
2726         seg_prt = NULL;
2727         wqe = NULL;
2728         seg_siz = 0;
2729
2730         /* Copy data inline */
2731         for (i = 0; i < wr->num_sge; i++) {
2732                 u32 len = wr->sg_list[i].length;
2733                 void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2734
2735                 while (len > 0) {
2736                         u32 cur;
2737
2738                         /* New segment required */
2739                         if (!seg_siz) {
2740                                 wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2741                                 seg_prt = wqe;
2742                                 seg_siz = sizeof(struct rdma_sq_common_wqe);
2743                                 (*wqe_size)++;
2744                         }
2745
2746                         /* Calculate currently allowed length */
2747                         cur = min_t(u32, len, seg_siz);
2748                         memcpy(seg_prt, src, cur);
2749
2750                         /* Update segment variables */
2751                         seg_prt += cur;
2752                         seg_siz -= cur;
2753
2754                         /* Update sge variables */
2755                         src += cur;
2756                         len -= cur;
2757
2758                         /* Swap fully-completed segments */
2759                         if (!seg_siz)
2760                                 swap_wqe_data64((u64 *)wqe);
2761                 }
2762         }
2763
2764         /* swap last not completed segment */
2765         if (seg_siz)
2766                 swap_wqe_data64((u64 *)wqe);
2767
2768         return data_size;
2769 }
2770
2771 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)                 \
2772         do {                                                    \
2773                 DMA_REGPAIR_LE(sge->addr, vaddr);               \
2774                 (sge)->length = cpu_to_le32(vlength);           \
2775                 (sge)->flags = cpu_to_le32(vflags);             \
2776         } while (0)
2777
2778 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)                       \
2779         do {                                                    \
2780                 DMA_REGPAIR_LE(hdr->wr_id, vwr_id);             \
2781                 (hdr)->num_sges = num_sge;                      \
2782         } while (0)
2783
2784 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)                 \
2785         do {                                                    \
2786                 DMA_REGPAIR_LE(sge->addr, vaddr);               \
2787                 (sge)->length = cpu_to_le32(vlength);           \
2788                 (sge)->l_key = cpu_to_le32(vlkey);              \
2789         } while (0)
2790
2791 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2792                                 struct ib_send_wr *wr)
2793 {
2794         u32 data_size = 0;
2795         int i;
2796
2797         for (i = 0; i < wr->num_sge; i++) {
2798                 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2799
2800                 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2801                 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2802                 sge->length = cpu_to_le32(wr->sg_list[i].length);
2803                 data_size += wr->sg_list[i].length;
2804         }
2805
2806         if (wqe_size)
2807                 *wqe_size += wr->num_sge;
2808
2809         return data_size;
2810 }
2811
2812 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2813                                      struct qedr_qp *qp,
2814                                      struct rdma_sq_rdma_wqe_1st *rwqe,
2815                                      struct rdma_sq_rdma_wqe_2nd *rwqe2,
2816                                      struct ib_send_wr *wr,
2817                                      struct ib_send_wr **bad_wr)
2818 {
2819         rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2820         DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2821
2822         if (wr->send_flags & IB_SEND_INLINE &&
2823             (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2824              wr->opcode == IB_WR_RDMA_WRITE)) {
2825                 u8 flags = 0;
2826
2827                 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2828                 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2829                                                    bad_wr, &rwqe->flags, flags);
2830         }
2831
2832         return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2833 }
2834
2835 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2836                                      struct qedr_qp *qp,
2837                                      struct rdma_sq_send_wqe_1st *swqe,
2838                                      struct rdma_sq_send_wqe_2st *swqe2,
2839                                      struct ib_send_wr *wr,
2840                                      struct ib_send_wr **bad_wr)
2841 {
2842         memset(swqe2, 0, sizeof(*swqe2));
2843         if (wr->send_flags & IB_SEND_INLINE) {
2844                 u8 flags = 0;
2845
2846                 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2847                 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2848                                                    bad_wr, &swqe->flags, flags);
2849         }
2850
2851         return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2852 }
2853
2854 static int qedr_prepare_reg(struct qedr_qp *qp,
2855                             struct rdma_sq_fmr_wqe_1st *fwqe1,
2856                             struct ib_reg_wr *wr)
2857 {
2858         struct qedr_mr *mr = get_qedr_mr(wr->mr);
2859         struct rdma_sq_fmr_wqe_2nd *fwqe2;
2860
2861         fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2862         fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2863         fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2864         fwqe1->l_key = wr->key;
2865
2866         fwqe2->access_ctrl = 0;
2867
2868         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2869                    !!(wr->access & IB_ACCESS_REMOTE_READ));
2870         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2871                    !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2872         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2873                    !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2874         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2875         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2876                    !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2877         fwqe2->fmr_ctrl = 0;
2878
2879         SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2880                    ilog2(mr->ibmr.page_size) - 12);
2881
2882         fwqe2->length_hi = 0;
2883         fwqe2->length_lo = mr->ibmr.length;
2884         fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2885         fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2886
2887         qp->wqe_wr_id[qp->sq.prod].mr = mr;
2888
2889         return 0;
2890 }
2891
2892 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2893 {
2894         switch (opcode) {
2895         case IB_WR_RDMA_WRITE:
2896         case IB_WR_RDMA_WRITE_WITH_IMM:
2897                 return IB_WC_RDMA_WRITE;
2898         case IB_WR_SEND_WITH_IMM:
2899         case IB_WR_SEND:
2900         case IB_WR_SEND_WITH_INV:
2901                 return IB_WC_SEND;
2902         case IB_WR_RDMA_READ:
2903         case IB_WR_RDMA_READ_WITH_INV:
2904                 return IB_WC_RDMA_READ;
2905         case IB_WR_ATOMIC_CMP_AND_SWP:
2906                 return IB_WC_COMP_SWAP;
2907         case IB_WR_ATOMIC_FETCH_AND_ADD:
2908                 return IB_WC_FETCH_ADD;
2909         case IB_WR_REG_MR:
2910                 return IB_WC_REG_MR;
2911         case IB_WR_LOCAL_INV:
2912                 return IB_WC_LOCAL_INV;
2913         default:
2914                 return IB_WC_SEND;
2915         }
2916 }
2917
2918 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2919 {
2920         int wq_is_full, err_wr, pbl_is_full;
2921         struct qedr_dev *dev = qp->dev;
2922
2923         /* prevent SQ overflow and/or processing of a bad WR */
2924         err_wr = wr->num_sge > qp->sq.max_sges;
2925         wq_is_full = qedr_wq_is_full(&qp->sq);
2926         pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2927                       QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2928         if (wq_is_full || err_wr || pbl_is_full) {
2929                 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2930                         DP_ERR(dev,
2931                                "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2932                                qp);
2933                         qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2934                 }
2935
2936                 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2937                         DP_ERR(dev,
2938                                "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2939                                qp);
2940                         qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2941                 }
2942
2943                 if (pbl_is_full &&
2944                     !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2945                         DP_ERR(dev,
2946                                "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2947                                qp);
2948                         qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2949                 }
2950                 return false;
2951         }
2952         return true;
2953 }
2954
2955 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2956                      struct ib_send_wr **bad_wr)
2957 {
2958         struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2959         struct qedr_qp *qp = get_qedr_qp(ibqp);
2960         struct rdma_sq_atomic_wqe_1st *awqe1;
2961         struct rdma_sq_atomic_wqe_2nd *awqe2;
2962         struct rdma_sq_atomic_wqe_3rd *awqe3;
2963         struct rdma_sq_send_wqe_2st *swqe2;
2964         struct rdma_sq_local_inv_wqe *iwqe;
2965         struct rdma_sq_rdma_wqe_2nd *rwqe2;
2966         struct rdma_sq_send_wqe_1st *swqe;
2967         struct rdma_sq_rdma_wqe_1st *rwqe;
2968         struct rdma_sq_fmr_wqe_1st *fwqe1;
2969         struct rdma_sq_common_wqe *wqe;
2970         u32 length;
2971         int rc = 0;
2972         bool comp;
2973
2974         if (!qedr_can_post_send(qp, wr)) {
2975                 *bad_wr = wr;
2976                 return -ENOMEM;
2977         }
2978
2979         wqe = qed_chain_produce(&qp->sq.pbl);
2980         qp->wqe_wr_id[qp->sq.prod].signaled =
2981                 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2982
2983         wqe->flags = 0;
2984         SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2985                    !!(wr->send_flags & IB_SEND_SOLICITED));
2986         comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2987         SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2988         SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2989                    !!(wr->send_flags & IB_SEND_FENCE));
2990         wqe->prev_wqe_size = qp->prev_wqe_size;
2991
2992         qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2993
2994         switch (wr->opcode) {
2995         case IB_WR_SEND_WITH_IMM:
2996                 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
2997                         rc = -EINVAL;
2998                         *bad_wr = wr;
2999                         break;
3000                 }
3001                 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3002                 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3003                 swqe->wqe_size = 2;
3004                 swqe2 = qed_chain_produce(&qp->sq.pbl);
3005
3006                 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3007                 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3008                                                    wr, bad_wr);
3009                 swqe->length = cpu_to_le32(length);
3010                 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3011                 qp->prev_wqe_size = swqe->wqe_size;
3012                 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3013                 break;
3014         case IB_WR_SEND:
3015                 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3016                 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3017
3018                 swqe->wqe_size = 2;
3019                 swqe2 = qed_chain_produce(&qp->sq.pbl);
3020                 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3021                                                    wr, bad_wr);
3022                 swqe->length = cpu_to_le32(length);
3023                 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3024                 qp->prev_wqe_size = swqe->wqe_size;
3025                 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3026                 break;
3027         case IB_WR_SEND_WITH_INV:
3028                 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3029                 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3030                 swqe2 = qed_chain_produce(&qp->sq.pbl);
3031                 swqe->wqe_size = 2;
3032                 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3033                 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3034                                                    wr, bad_wr);
3035                 swqe->length = cpu_to_le32(length);
3036                 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3037                 qp->prev_wqe_size = swqe->wqe_size;
3038                 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3039                 break;
3040
3041         case IB_WR_RDMA_WRITE_WITH_IMM:
3042                 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3043                         rc = -EINVAL;
3044                         *bad_wr = wr;
3045                         break;
3046                 }
3047                 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3048                 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3049
3050                 rwqe->wqe_size = 2;
3051                 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3052                 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3053                 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3054                                                    wr, bad_wr);
3055                 rwqe->length = cpu_to_le32(length);
3056                 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3057                 qp->prev_wqe_size = rwqe->wqe_size;
3058                 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3059                 break;
3060         case IB_WR_RDMA_WRITE:
3061                 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3062                 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3063
3064                 rwqe->wqe_size = 2;
3065                 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3066                 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3067                                                    wr, bad_wr);
3068                 rwqe->length = cpu_to_le32(length);
3069                 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3070                 qp->prev_wqe_size = rwqe->wqe_size;
3071                 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3072                 break;
3073         case IB_WR_RDMA_READ_WITH_INV:
3074                 SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3075                 /* fallthrough -- same is identical to RDMA READ */
3076
3077         case IB_WR_RDMA_READ:
3078                 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3079                 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3080
3081                 rwqe->wqe_size = 2;
3082                 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3083                 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3084                                                    wr, bad_wr);
3085                 rwqe->length = cpu_to_le32(length);
3086                 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3087                 qp->prev_wqe_size = rwqe->wqe_size;
3088                 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3089                 break;
3090
3091         case IB_WR_ATOMIC_CMP_AND_SWP:
3092         case IB_WR_ATOMIC_FETCH_AND_ADD:
3093                 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3094                 awqe1->wqe_size = 4;
3095
3096                 awqe2 = qed_chain_produce(&qp->sq.pbl);
3097                 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3098                 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3099
3100                 awqe3 = qed_chain_produce(&qp->sq.pbl);
3101
3102                 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3103                         wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3104                         DMA_REGPAIR_LE(awqe3->swap_data,
3105                                        atomic_wr(wr)->compare_add);
3106                 } else {
3107                         wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3108                         DMA_REGPAIR_LE(awqe3->swap_data,
3109                                        atomic_wr(wr)->swap);
3110                         DMA_REGPAIR_LE(awqe3->cmp_data,
3111                                        atomic_wr(wr)->compare_add);
3112                 }
3113
3114                 qedr_prepare_sq_sges(qp, NULL, wr);
3115
3116                 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3117                 qp->prev_wqe_size = awqe1->wqe_size;
3118                 break;
3119
3120         case IB_WR_LOCAL_INV:
3121                 iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3122                 iwqe->wqe_size = 1;
3123
3124                 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3125                 iwqe->inv_l_key = wr->ex.invalidate_rkey;
3126                 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3127                 qp->prev_wqe_size = iwqe->wqe_size;
3128                 break;
3129         case IB_WR_REG_MR:
3130                 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3131                 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3132                 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3133                 fwqe1->wqe_size = 2;
3134
3135                 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3136                 if (rc) {
3137                         DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3138                         *bad_wr = wr;
3139                         break;
3140                 }
3141
3142                 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3143                 qp->prev_wqe_size = fwqe1->wqe_size;
3144                 break;
3145         default:
3146                 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3147                 rc = -EINVAL;
3148                 *bad_wr = wr;
3149                 break;
3150         }
3151
3152         if (*bad_wr) {
3153                 u16 value;
3154
3155                 /* Restore prod to its position before
3156                  * this WR was processed
3157                  */
3158                 value = le16_to_cpu(qp->sq.db_data.data.value);
3159                 qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3160
3161                 /* Restore prev_wqe_size */
3162                 qp->prev_wqe_size = wqe->prev_wqe_size;
3163                 rc = -EINVAL;
3164                 DP_ERR(dev, "POST SEND FAILED\n");
3165         }
3166
3167         return rc;
3168 }
3169
3170 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3171                    struct ib_send_wr **bad_wr)
3172 {
3173         struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3174         struct qedr_qp *qp = get_qedr_qp(ibqp);
3175         unsigned long flags;
3176         int rc = 0;
3177
3178         *bad_wr = NULL;
3179
3180         if (qp->qp_type == IB_QPT_GSI)
3181                 return qedr_gsi_post_send(ibqp, wr, bad_wr);
3182
3183         spin_lock_irqsave(&qp->q_lock, flags);
3184
3185         if (rdma_protocol_roce(&dev->ibdev, 1)) {
3186                 if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3187                     (qp->state != QED_ROCE_QP_STATE_ERR) &&
3188                     (qp->state != QED_ROCE_QP_STATE_SQD)) {
3189                         spin_unlock_irqrestore(&qp->q_lock, flags);
3190                         *bad_wr = wr;
3191                         DP_DEBUG(dev, QEDR_MSG_CQ,
3192                                  "QP in wrong state! QP icid=0x%x state %d\n",
3193                                  qp->icid, qp->state);
3194                         return -EINVAL;
3195                 }
3196         }
3197
3198         while (wr) {
3199                 rc = __qedr_post_send(ibqp, wr, bad_wr);
3200                 if (rc)
3201                         break;
3202
3203                 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;