IB/rxe: Fix missing completion for mem_reg work requests
[muen/linux.git] / drivers / infiniband / hw / qedr / verbs.c
1 /* QLogic qedr NIC Driver
2  * Copyright (c) 2015-2016  QLogic Corporation
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and /or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/dma-mapping.h>
33 #include <linux/crc32.h>
34 #include <net/ip.h>
35 #include <net/ipv6.h>
36 #include <net/udp.h>
37 #include <linux/iommu.h>
38
39 #include <rdma/ib_verbs.h>
40 #include <rdma/ib_user_verbs.h>
41 #include <rdma/iw_cm.h>
42 #include <rdma/ib_umem.h>
43 #include <rdma/ib_addr.h>
44 #include <rdma/ib_cache.h>
45
46 #include <linux/qed/common_hsi.h>
47 #include "qedr_hsi_rdma.h"
48 #include <linux/qed/qed_if.h>
49 #include "qedr.h"
50 #include "verbs.h"
51 #include <rdma/qedr-abi.h>
52 #include "qedr_roce_cm.h"
53
54 #define DB_ADDR_SHIFT(addr)             ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
55
56 static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src,
57                                         size_t len)
58 {
59         size_t min_len = min_t(size_t, len, udata->outlen);
60
61         return ib_copy_to_udata(udata, src, min_len);
62 }
63
64 int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
65 {
66         if (index > QEDR_ROCE_PKEY_TABLE_LEN)
67                 return -EINVAL;
68
69         *pkey = QEDR_ROCE_PKEY_DEFAULT;
70         return 0;
71 }
72
73 int qedr_iw_query_gid(struct ib_device *ibdev, u8 port,
74                       int index, union ib_gid *sgid)
75 {
76         struct qedr_dev *dev = get_qedr_dev(ibdev);
77
78         memset(sgid->raw, 0, sizeof(sgid->raw));
79         ether_addr_copy(sgid->raw, dev->ndev->dev_addr);
80
81         DP_DEBUG(dev, QEDR_MSG_INIT, "QUERY sgid[%d]=%llx:%llx\n", index,
82                  sgid->global.interface_id, sgid->global.subnet_prefix);
83
84         return 0;
85 }
86
87 int qedr_query_device(struct ib_device *ibdev,
88                       struct ib_device_attr *attr, struct ib_udata *udata)
89 {
90         struct qedr_dev *dev = get_qedr_dev(ibdev);
91         struct qedr_device_attr *qattr = &dev->attr;
92
93         if (!dev->rdma_ctx) {
94                 DP_ERR(dev,
95                        "qedr_query_device called with invalid params rdma_ctx=%p\n",
96                        dev->rdma_ctx);
97                 return -EINVAL;
98         }
99
100         memset(attr, 0, sizeof(*attr));
101
102         attr->fw_ver = qattr->fw_ver;
103         attr->sys_image_guid = qattr->sys_image_guid;
104         attr->max_mr_size = qattr->max_mr_size;
105         attr->page_size_cap = qattr->page_size_caps;
106         attr->vendor_id = qattr->vendor_id;
107         attr->vendor_part_id = qattr->vendor_part_id;
108         attr->hw_ver = qattr->hw_ver;
109         attr->max_qp = qattr->max_qp;
110         attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
111         attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
112             IB_DEVICE_RC_RNR_NAK_GEN |
113             IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
114
115         attr->max_sge = qattr->max_sge;
116         attr->max_sge_rd = qattr->max_sge;
117         attr->max_cq = qattr->max_cq;
118         attr->max_cqe = qattr->max_cqe;
119         attr->max_mr = qattr->max_mr;
120         attr->max_mw = qattr->max_mw;
121         attr->max_pd = qattr->max_pd;
122         attr->atomic_cap = dev->atomic_cap;
123         attr->max_fmr = qattr->max_fmr;
124         attr->max_map_per_fmr = 16;
125         attr->max_qp_init_rd_atom =
126             1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
127         attr->max_qp_rd_atom =
128             min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
129                 attr->max_qp_init_rd_atom);
130
131         attr->max_srq = qattr->max_srq;
132         attr->max_srq_sge = qattr->max_srq_sge;
133         attr->max_srq_wr = qattr->max_srq_wr;
134
135         attr->local_ca_ack_delay = qattr->dev_ack_delay;
136         attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
137         attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
138         attr->max_ah = qattr->max_ah;
139
140         return 0;
141 }
142
143 #define QEDR_SPEED_SDR          (1)
144 #define QEDR_SPEED_DDR          (2)
145 #define QEDR_SPEED_QDR          (4)
146 #define QEDR_SPEED_FDR10        (8)
147 #define QEDR_SPEED_FDR          (16)
148 #define QEDR_SPEED_EDR          (32)
149
150 static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
151                                             u8 *ib_width)
152 {
153         switch (speed) {
154         case 1000:
155                 *ib_speed = QEDR_SPEED_SDR;
156                 *ib_width = IB_WIDTH_1X;
157                 break;
158         case 10000:
159                 *ib_speed = QEDR_SPEED_QDR;
160                 *ib_width = IB_WIDTH_1X;
161                 break;
162
163         case 20000:
164                 *ib_speed = QEDR_SPEED_DDR;
165                 *ib_width = IB_WIDTH_4X;
166                 break;
167
168         case 25000:
169                 *ib_speed = QEDR_SPEED_EDR;
170                 *ib_width = IB_WIDTH_1X;
171                 break;
172
173         case 40000:
174                 *ib_speed = QEDR_SPEED_QDR;
175                 *ib_width = IB_WIDTH_4X;
176                 break;
177
178         case 50000:
179                 *ib_speed = QEDR_SPEED_QDR;
180                 *ib_width = IB_WIDTH_4X;
181                 break;
182
183         case 100000:
184                 *ib_speed = QEDR_SPEED_EDR;
185                 *ib_width = IB_WIDTH_4X;
186                 break;
187
188         default:
189                 /* Unsupported */
190                 *ib_speed = QEDR_SPEED_SDR;
191                 *ib_width = IB_WIDTH_1X;
192         }
193 }
194
195 int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
196 {
197         struct qedr_dev *dev;
198         struct qed_rdma_port *rdma_port;
199
200         dev = get_qedr_dev(ibdev);
201         if (port > 1) {
202                 DP_ERR(dev, "invalid_port=0x%x\n", port);
203                 return -EINVAL;
204         }
205
206         if (!dev->rdma_ctx) {
207                 DP_ERR(dev, "rdma_ctx is NULL\n");
208                 return -EINVAL;
209         }
210
211         rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
212
213         /* *attr being zeroed by the caller, avoid zeroing it here */
214         if (rdma_port->port_state == QED_RDMA_PORT_UP) {
215                 attr->state = IB_PORT_ACTIVE;
216                 attr->phys_state = 5;
217         } else {
218                 attr->state = IB_PORT_DOWN;
219                 attr->phys_state = 3;
220         }
221         attr->max_mtu = IB_MTU_4096;
222         attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
223         attr->lid = 0;
224         attr->lmc = 0;
225         attr->sm_lid = 0;
226         attr->sm_sl = 0;
227         attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
228         if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
229                 attr->gid_tbl_len = 1;
230                 attr->pkey_tbl_len = 1;
231         } else {
232                 attr->gid_tbl_len = QEDR_MAX_SGID;
233                 attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
234         }
235         attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
236         attr->qkey_viol_cntr = 0;
237         get_link_speed_and_width(rdma_port->link_speed,
238                                  &attr->active_speed, &attr->active_width);
239         attr->max_msg_sz = rdma_port->max_msg_size;
240         attr->max_vl_num = 4;
241
242         return 0;
243 }
244
245 int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask,
246                      struct ib_port_modify *props)
247 {
248         struct qedr_dev *dev;
249
250         dev = get_qedr_dev(ibdev);
251         if (port > 1) {
252                 DP_ERR(dev, "invalid_port=0x%x\n", port);
253                 return -EINVAL;
254         }
255
256         return 0;
257 }
258
259 static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
260                          unsigned long len)
261 {
262         struct qedr_mm *mm;
263
264         mm = kzalloc(sizeof(*mm), GFP_KERNEL);
265         if (!mm)
266                 return -ENOMEM;
267
268         mm->key.phy_addr = phy_addr;
269         /* This function might be called with a length which is not a multiple
270          * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
271          * forces this granularity by increasing the requested size if needed.
272          * When qedr_mmap is called, it will search the list with the updated
273          * length as a key. To prevent search failures, the length is rounded up
274          * in advance to PAGE_SIZE.
275          */
276         mm->key.len = roundup(len, PAGE_SIZE);
277         INIT_LIST_HEAD(&mm->entry);
278
279         mutex_lock(&uctx->mm_list_lock);
280         list_add(&mm->entry, &uctx->mm_head);
281         mutex_unlock(&uctx->mm_list_lock);
282
283         DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
284                  "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
285                  (unsigned long long)mm->key.phy_addr,
286                  (unsigned long)mm->key.len, uctx);
287
288         return 0;
289 }
290
291 static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
292                              unsigned long len)
293 {
294         bool found = false;
295         struct qedr_mm *mm;
296
297         mutex_lock(&uctx->mm_list_lock);
298         list_for_each_entry(mm, &uctx->mm_head, entry) {
299                 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
300                         continue;
301
302                 found = true;
303                 break;
304         }
305         mutex_unlock(&uctx->mm_list_lock);
306         DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
307                  "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
308                  mm->key.phy_addr, mm->key.len, uctx, found);
309
310         return found;
311 }
312
313 struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
314                                         struct ib_udata *udata)
315 {
316         int rc;
317         struct qedr_ucontext *ctx;
318         struct qedr_alloc_ucontext_resp uresp;
319         struct qedr_dev *dev = get_qedr_dev(ibdev);
320         struct qed_rdma_add_user_out_params oparams;
321
322         if (!udata)
323                 return ERR_PTR(-EFAULT);
324
325         ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
326         if (!ctx)
327                 return ERR_PTR(-ENOMEM);
328
329         rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
330         if (rc) {
331                 DP_ERR(dev,
332                        "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
333                        rc);
334                 goto err;
335         }
336
337         ctx->dpi = oparams.dpi;
338         ctx->dpi_addr = oparams.dpi_addr;
339         ctx->dpi_phys_addr = oparams.dpi_phys_addr;
340         ctx->dpi_size = oparams.dpi_size;
341         INIT_LIST_HEAD(&ctx->mm_head);
342         mutex_init(&ctx->mm_list_lock);
343
344         memset(&uresp, 0, sizeof(uresp));
345
346         uresp.dpm_enabled = dev->user_dpm_enabled;
347         uresp.wids_enabled = 1;
348         uresp.wid_count = oparams.wid_count;
349         uresp.db_pa = ctx->dpi_phys_addr;
350         uresp.db_size = ctx->dpi_size;
351         uresp.max_send_wr = dev->attr.max_sqe;
352         uresp.max_recv_wr = dev->attr.max_rqe;
353         uresp.max_srq_wr = dev->attr.max_srq_wr;
354         uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
355         uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
356         uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
357         uresp.max_cqes = QEDR_MAX_CQES;
358
359         rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
360         if (rc)
361                 goto err;
362
363         ctx->dev = dev;
364
365         rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
366         if (rc)
367                 goto err;
368
369         DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
370                  &ctx->ibucontext);
371         return &ctx->ibucontext;
372
373 err:
374         kfree(ctx);
375         return ERR_PTR(rc);
376 }
377
378 int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
379 {
380         struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
381         struct qedr_mm *mm, *tmp;
382         int status = 0;
383
384         DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
385                  uctx);
386         uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
387
388         list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
389                 DP_DEBUG(uctx->dev, QEDR_MSG_MISC,
390                          "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
391                          mm->key.phy_addr, mm->key.len, uctx);
392                 list_del(&mm->entry);
393                 kfree(mm);
394         }
395
396         kfree(uctx);
397         return status;
398 }
399
400 int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
401 {
402         struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
403         struct qedr_dev *dev = get_qedr_dev(context->device);
404         unsigned long phys_addr = vma->vm_pgoff << PAGE_SHIFT;
405         unsigned long len = (vma->vm_end - vma->vm_start);
406         unsigned long dpi_start;
407
408         dpi_start = dev->db_phys_addr + (ucontext->dpi * ucontext->dpi_size);
409
410         DP_DEBUG(dev, QEDR_MSG_INIT,
411                  "mmap invoked with vm_start=0x%pK, vm_end=0x%pK,vm_pgoff=0x%pK; dpi_start=0x%pK dpi_size=0x%x\n",
412                  (void *)vma->vm_start, (void *)vma->vm_end,
413                  (void *)vma->vm_pgoff, (void *)dpi_start, ucontext->dpi_size);
414
415         if ((vma->vm_start & (PAGE_SIZE - 1)) || (len & (PAGE_SIZE - 1))) {
416                 DP_ERR(dev,
417                        "failed mmap, addresses must be page aligned: start=0x%pK, end=0x%pK\n",
418                        (void *)vma->vm_start, (void *)vma->vm_end);
419                 return -EINVAL;
420         }
421
422         if (!qedr_search_mmap(ucontext, phys_addr, len)) {
423                 DP_ERR(dev, "failed mmap, vm_pgoff=0x%lx is not authorized\n",
424                        vma->vm_pgoff);
425                 return -EINVAL;
426         }
427
428         if (phys_addr < dpi_start ||
429             ((phys_addr + len) > (dpi_start + ucontext->dpi_size))) {
430                 DP_ERR(dev,
431                        "failed mmap, pages are outside of dpi; page address=0x%pK, dpi_start=0x%pK, dpi_size=0x%x\n",
432                        (void *)phys_addr, (void *)dpi_start,
433                        ucontext->dpi_size);
434                 return -EINVAL;
435         }
436
437         if (vma->vm_flags & VM_READ) {
438                 DP_ERR(dev, "failed mmap, cannot map doorbell bar for read\n");
439                 return -EINVAL;
440         }
441
442         vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
443         return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, len,
444                                   vma->vm_page_prot);
445 }
446
447 struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
448                             struct ib_ucontext *context, struct ib_udata *udata)
449 {
450         struct qedr_dev *dev = get_qedr_dev(ibdev);
451         struct qedr_pd *pd;
452         u16 pd_id;
453         int rc;
454
455         DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n",
456                  (udata && context) ? "User Lib" : "Kernel");
457
458         if (!dev->rdma_ctx) {
459                 DP_ERR(dev, "invalid RDMA context\n");
460                 return ERR_PTR(-EINVAL);
461         }
462
463         pd = kzalloc(sizeof(*pd), GFP_KERNEL);
464         if (!pd)
465                 return ERR_PTR(-ENOMEM);
466
467         rc = dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
468         if (rc)
469                 goto err;
470
471         pd->pd_id = pd_id;
472
473         if (udata && context) {
474                 struct qedr_alloc_pd_uresp uresp = {
475                         .pd_id = pd_id,
476                 };
477
478                 rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
479                 if (rc) {
480                         DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
481                         dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd_id);
482                         goto err;
483                 }
484
485                 pd->uctx = get_qedr_ucontext(context);
486                 pd->uctx->pd = pd;
487         }
488
489         return &pd->ibpd;
490
491 err:
492         kfree(pd);
493         return ERR_PTR(rc);
494 }
495
496 int qedr_dealloc_pd(struct ib_pd *ibpd)
497 {
498         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
499         struct qedr_pd *pd = get_qedr_pd(ibpd);
500
501         if (!pd) {
502                 pr_err("Invalid PD received in dealloc_pd\n");
503                 return -EINVAL;
504         }
505
506         DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
507         dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
508
509         kfree(pd);
510
511         return 0;
512 }
513
514 static void qedr_free_pbl(struct qedr_dev *dev,
515                           struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
516 {
517         struct pci_dev *pdev = dev->pdev;
518         int i;
519
520         for (i = 0; i < pbl_info->num_pbls; i++) {
521                 if (!pbl[i].va)
522                         continue;
523                 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
524                                   pbl[i].va, pbl[i].pa);
525         }
526
527         kfree(pbl);
528 }
529
530 #define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
531 #define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
532
533 #define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
534 #define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
535 #define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
536
537 static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
538                                            struct qedr_pbl_info *pbl_info,
539                                            gfp_t flags)
540 {
541         struct pci_dev *pdev = dev->pdev;
542         struct qedr_pbl *pbl_table;
543         dma_addr_t *pbl_main_tbl;
544         dma_addr_t pa;
545         void *va;
546         int i;
547
548         pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
549         if (!pbl_table)
550                 return ERR_PTR(-ENOMEM);
551
552         for (i = 0; i < pbl_info->num_pbls; i++) {
553                 va = dma_zalloc_coherent(&pdev->dev, pbl_info->pbl_size,
554                                          &pa, flags);
555                 if (!va)
556                         goto err;
557
558                 pbl_table[i].va = va;
559                 pbl_table[i].pa = pa;
560         }
561
562         /* Two-Layer PBLs, if we have more than one pbl we need to initialize
563          * the first one with physical pointers to all of the rest
564          */
565         pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
566         for (i = 0; i < pbl_info->num_pbls - 1; i++)
567                 pbl_main_tbl[i] = pbl_table[i + 1].pa;
568
569         return pbl_table;
570
571 err:
572         for (i--; i >= 0; i--)
573                 dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
574                                   pbl_table[i].va, pbl_table[i].pa);
575
576         qedr_free_pbl(dev, pbl_info, pbl_table);
577
578         return ERR_PTR(-ENOMEM);
579 }
580
581 static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
582                                 struct qedr_pbl_info *pbl_info,
583                                 u32 num_pbes, int two_layer_capable)
584 {
585         u32 pbl_capacity;
586         u32 pbl_size;
587         u32 num_pbls;
588
589         if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
590                 if (num_pbes > MAX_PBES_TWO_LAYER) {
591                         DP_ERR(dev, "prepare pbl table: too many pages %d\n",
592                                num_pbes);
593                         return -EINVAL;
594                 }
595
596                 /* calculate required pbl page size */
597                 pbl_size = MIN_FW_PBL_PAGE_SIZE;
598                 pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
599                                NUM_PBES_ON_PAGE(pbl_size);
600
601                 while (pbl_capacity < num_pbes) {
602                         pbl_size *= 2;
603                         pbl_capacity = pbl_size / sizeof(u64);
604                         pbl_capacity = pbl_capacity * pbl_capacity;
605                 }
606
607                 num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
608                 num_pbls++;     /* One for the layer0 ( points to the pbls) */
609                 pbl_info->two_layered = true;
610         } else {
611                 /* One layered PBL */
612                 num_pbls = 1;
613                 pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
614                                  roundup_pow_of_two((num_pbes * sizeof(u64))));
615                 pbl_info->two_layered = false;
616         }
617
618         pbl_info->num_pbls = num_pbls;
619         pbl_info->pbl_size = pbl_size;
620         pbl_info->num_pbes = num_pbes;
621
622         DP_DEBUG(dev, QEDR_MSG_MR,
623                  "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
624                  pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
625
626         return 0;
627 }
628
629 static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
630                                struct qedr_pbl *pbl,
631                                struct qedr_pbl_info *pbl_info, u32 pg_shift)
632 {
633         int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
634         u32 fw_pg_cnt, fw_pg_per_umem_pg;
635         struct qedr_pbl *pbl_tbl;
636         struct scatterlist *sg;
637         struct regpair *pbe;
638         u64 pg_addr;
639         int entry;
640
641         if (!pbl_info->num_pbes)
642                 return;
643
644         /* If we have a two layered pbl, the first pbl points to the rest
645          * of the pbls and the first entry lays on the second pbl in the table
646          */
647         if (pbl_info->two_layered)
648                 pbl_tbl = &pbl[1];
649         else
650                 pbl_tbl = pbl;
651
652         pbe = (struct regpair *)pbl_tbl->va;
653         if (!pbe) {
654                 DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n");
655                 return;
656         }
657
658         pbe_cnt = 0;
659
660         shift = umem->page_shift;
661
662         fw_pg_per_umem_pg = BIT(umem->page_shift - pg_shift);
663
664         for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
665                 pages = sg_dma_len(sg) >> shift;
666                 pg_addr = sg_dma_address(sg);
667                 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
668                         for (fw_pg_cnt = 0; fw_pg_cnt < fw_pg_per_umem_pg;) {
669                                 pbe->lo = cpu_to_le32(pg_addr);
670                                 pbe->hi = cpu_to_le32(upper_32_bits(pg_addr));
671
672                                 pg_addr += BIT(pg_shift);
673                                 pbe_cnt++;
674                                 total_num_pbes++;
675                                 pbe++;
676
677                                 if (total_num_pbes == pbl_info->num_pbes)
678                                         return;
679
680                                 /* If the given pbl is full storing the pbes,
681                                  * move to next pbl.
682                                  */
683                                 if (pbe_cnt ==
684                                     (pbl_info->pbl_size / sizeof(u64))) {
685                                         pbl_tbl++;
686                                         pbe = (struct regpair *)pbl_tbl->va;
687                                         pbe_cnt = 0;
688                                 }
689
690                                 fw_pg_cnt++;
691                         }
692                 }
693         }
694 }
695
696 static int qedr_copy_cq_uresp(struct qedr_dev *dev,
697                               struct qedr_cq *cq, struct ib_udata *udata)
698 {
699         struct qedr_create_cq_uresp uresp;
700         int rc;
701
702         memset(&uresp, 0, sizeof(uresp));
703
704         uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
705         uresp.icid = cq->icid;
706
707         rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
708         if (rc)
709                 DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
710
711         return rc;
712 }
713
714 static void consume_cqe(struct qedr_cq *cq)
715 {
716         if (cq->latest_cqe == cq->toggle_cqe)
717                 cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
718
719         cq->latest_cqe = qed_chain_consume(&cq->pbl);
720 }
721
722 static inline int qedr_align_cq_entries(int entries)
723 {
724         u64 size, aligned_size;
725
726         /* We allocate an extra entry that we don't report to the FW. */
727         size = (entries + 1) * QEDR_CQE_SIZE;
728         aligned_size = ALIGN(size, PAGE_SIZE);
729
730         return aligned_size / QEDR_CQE_SIZE;
731 }
732
733 static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
734                                        struct qedr_dev *dev,
735                                        struct qedr_userq *q,
736                                        u64 buf_addr, size_t buf_len,
737                                        int access, int dmasync,
738                                        int alloc_and_init)
739 {
740         u32 fw_pages;
741         int rc;
742
743         q->buf_addr = buf_addr;
744         q->buf_len = buf_len;
745         q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
746         if (IS_ERR(q->umem)) {
747                 DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
748                        PTR_ERR(q->umem));
749                 return PTR_ERR(q->umem);
750         }
751
752         fw_pages = ib_umem_page_count(q->umem) <<
753             (q->umem->page_shift - FW_PAGE_SHIFT);
754
755         rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, fw_pages, 0);
756         if (rc)
757                 goto err0;
758
759         if (alloc_and_init) {
760                 q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
761                 if (IS_ERR(q->pbl_tbl)) {
762                         rc = PTR_ERR(q->pbl_tbl);
763                         goto err0;
764                 }
765                 qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info,
766                                    FW_PAGE_SHIFT);
767         } else {
768                 q->pbl_tbl = kzalloc(sizeof(*q->pbl_tbl), GFP_KERNEL);
769                 if (!q->pbl_tbl) {
770                         rc = -ENOMEM;
771                         goto err0;
772                 }
773         }
774
775         return 0;
776
777 err0:
778         ib_umem_release(q->umem);
779         q->umem = NULL;
780
781         return rc;
782 }
783
784 static inline void qedr_init_cq_params(struct qedr_cq *cq,
785                                        struct qedr_ucontext *ctx,
786                                        struct qedr_dev *dev, int vector,
787                                        int chain_entries, int page_cnt,
788                                        u64 pbl_ptr,
789                                        struct qed_rdma_create_cq_in_params
790                                        *params)
791 {
792         memset(params, 0, sizeof(*params));
793         params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
794         params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
795         params->cnq_id = vector;
796         params->cq_size = chain_entries - 1;
797         params->dpi = (ctx) ? ctx->dpi : dev->dpi;
798         params->pbl_num_pages = page_cnt;
799         params->pbl_ptr = pbl_ptr;
800         params->pbl_two_level = 0;
801 }
802
803 static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
804 {
805         cq->db.data.agg_flags = flags;
806         cq->db.data.value = cpu_to_le32(cons);
807         writeq(cq->db.raw, cq->db_addr);
808
809         /* Make sure write would stick */
810         mmiowb();
811 }
812
813 int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
814 {
815         struct qedr_cq *cq = get_qedr_cq(ibcq);
816         unsigned long sflags;
817         struct qedr_dev *dev;
818
819         dev = get_qedr_dev(ibcq->device);
820
821         if (cq->destroyed) {
822                 DP_ERR(dev,
823                        "warning: arm was invoked after destroy for cq %p (icid=%d)\n",
824                        cq, cq->icid);
825                 return -EINVAL;
826         }
827
828
829         if (cq->cq_type == QEDR_CQ_TYPE_GSI)
830                 return 0;
831
832         spin_lock_irqsave(&cq->cq_lock, sflags);
833
834         cq->arm_flags = 0;
835
836         if (flags & IB_CQ_SOLICITED)
837                 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
838
839         if (flags & IB_CQ_NEXT_COMP)
840                 cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
841
842         doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
843
844         spin_unlock_irqrestore(&cq->cq_lock, sflags);
845
846         return 0;
847 }
848
849 struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
850                              const struct ib_cq_init_attr *attr,
851                              struct ib_ucontext *ib_ctx, struct ib_udata *udata)
852 {
853         struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
854         struct qed_rdma_destroy_cq_out_params destroy_oparams;
855         struct qed_rdma_destroy_cq_in_params destroy_iparams;
856         struct qedr_dev *dev = get_qedr_dev(ibdev);
857         struct qed_rdma_create_cq_in_params params;
858         struct qedr_create_cq_ureq ureq;
859         int vector = attr->comp_vector;
860         int entries = attr->cqe;
861         struct qedr_cq *cq;
862         int chain_entries;
863         int page_cnt;
864         u64 pbl_ptr;
865         u16 icid;
866         int rc;
867
868         DP_DEBUG(dev, QEDR_MSG_INIT,
869                  "create_cq: called from %s. entries=%d, vector=%d\n",
870                  udata ? "User Lib" : "Kernel", entries, vector);
871
872         if (entries > QEDR_MAX_CQES) {
873                 DP_ERR(dev,
874                        "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
875                        entries, QEDR_MAX_CQES);
876                 return ERR_PTR(-EINVAL);
877         }
878
879         chain_entries = qedr_align_cq_entries(entries);
880         chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
881
882         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
883         if (!cq)
884                 return ERR_PTR(-ENOMEM);
885
886         if (udata) {
887                 memset(&ureq, 0, sizeof(ureq));
888                 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
889                         DP_ERR(dev,
890                                "create cq: problem copying data from user space\n");
891                         goto err0;
892                 }
893
894                 if (!ureq.len) {
895                         DP_ERR(dev,
896                                "create cq: cannot create a cq with 0 entries\n");
897                         goto err0;
898                 }
899
900                 cq->cq_type = QEDR_CQ_TYPE_USER;
901
902                 rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr,
903                                           ureq.len, IB_ACCESS_LOCAL_WRITE,
904                                           1, 1);
905                 if (rc)
906                         goto err0;
907
908                 pbl_ptr = cq->q.pbl_tbl->pa;
909                 page_cnt = cq->q.pbl_info.num_pbes;
910
911                 cq->ibcq.cqe = chain_entries;
912         } else {
913                 cq->cq_type = QEDR_CQ_TYPE_KERNEL;
914
915                 rc = dev->ops->common->chain_alloc(dev->cdev,
916                                                    QED_CHAIN_USE_TO_CONSUME,
917                                                    QED_CHAIN_MODE_PBL,
918                                                    QED_CHAIN_CNT_TYPE_U32,
919                                                    chain_entries,
920                                                    sizeof(union rdma_cqe),
921                                                    &cq->pbl, NULL);
922                 if (rc)
923                         goto err1;
924
925                 page_cnt = qed_chain_get_page_cnt(&cq->pbl);
926                 pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
927                 cq->ibcq.cqe = cq->pbl.capacity;
928         }
929
930         qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
931                             pbl_ptr, &params);
932
933         rc = dev->ops->rdma_create_cq(dev->rdma_ctx, &params, &icid);
934         if (rc)
935                 goto err2;
936
937         cq->icid = icid;
938         cq->sig = QEDR_CQ_MAGIC_NUMBER;
939         spin_lock_init(&cq->cq_lock);
940
941         if (ib_ctx) {
942                 rc = qedr_copy_cq_uresp(dev, cq, udata);
943                 if (rc)
944                         goto err3;
945         } else {
946                 /* Generate doorbell address. */
947                 cq->db_addr = dev->db_addr +
948                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
949                 cq->db.data.icid = cq->icid;
950                 cq->db.data.params = DB_AGG_CMD_SET <<
951                     RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
952
953                 /* point to the very last element, passing it we will toggle */
954                 cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
955                 cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
956                 cq->latest_cqe = NULL;
957                 consume_cqe(cq);
958                 cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
959         }
960
961         DP_DEBUG(dev, QEDR_MSG_CQ,
962                  "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
963                  cq->icid, cq, params.cq_size);
964
965         return &cq->ibcq;
966
967 err3:
968         destroy_iparams.icid = cq->icid;
969         dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
970                                   &destroy_oparams);
971 err2:
972         if (udata)
973                 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
974         else
975                 dev->ops->common->chain_free(dev->cdev, &cq->pbl);
976 err1:
977         if (udata)
978                 ib_umem_release(cq->q.umem);
979 err0:
980         kfree(cq);
981         return ERR_PTR(-EINVAL);
982 }
983
984 int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
985 {
986         struct qedr_dev *dev = get_qedr_dev(ibcq->device);
987         struct qedr_cq *cq = get_qedr_cq(ibcq);
988
989         DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
990
991         return 0;
992 }
993
994 #define QEDR_DESTROY_CQ_MAX_ITERATIONS          (10)
995 #define QEDR_DESTROY_CQ_ITER_DURATION           (10)
996
997 int qedr_destroy_cq(struct ib_cq *ibcq)
998 {
999         struct qedr_dev *dev = get_qedr_dev(ibcq->device);
1000         struct qed_rdma_destroy_cq_out_params oparams;
1001         struct qed_rdma_destroy_cq_in_params iparams;
1002         struct qedr_cq *cq = get_qedr_cq(ibcq);
1003         int iter;
1004         int rc;
1005
1006         DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq %p (icid=%d)\n", cq, cq->icid);
1007
1008         cq->destroyed = 1;
1009
1010         /* GSIs CQs are handled by driver, so they don't exist in the FW */
1011         if (cq->cq_type == QEDR_CQ_TYPE_GSI)
1012                 goto done;
1013
1014         iparams.icid = cq->icid;
1015         rc = dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
1016         if (rc)
1017                 return rc;
1018
1019         dev->ops->common->chain_free(dev->cdev, &cq->pbl);
1020
1021         if (ibcq->uobject && ibcq->uobject->context) {
1022                 qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
1023                 ib_umem_release(cq->q.umem);
1024         }
1025
1026         /* We don't want the IRQ handler to handle a non-existing CQ so we
1027          * wait until all CNQ interrupts, if any, are received. This will always
1028          * happen and will always happen very fast. If not, then a serious error
1029          * has occured. That is why we can use a long delay.
1030          * We spin for a short time so we don’t lose time on context switching
1031          * in case all the completions are handled in that span. Otherwise
1032          * we sleep for a while and check again. Since the CNQ may be
1033          * associated with (only) the current CPU we use msleep to allow the
1034          * current CPU to be freed.
1035          * The CNQ notification is increased in qedr_irq_handler().
1036          */
1037         iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1038         while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1039                 udelay(QEDR_DESTROY_CQ_ITER_DURATION);
1040                 iter--;
1041         }
1042
1043         iter = QEDR_DESTROY_CQ_MAX_ITERATIONS;
1044         while (oparams.num_cq_notif != READ_ONCE(cq->cnq_notif) && iter) {
1045                 msleep(QEDR_DESTROY_CQ_ITER_DURATION);
1046                 iter--;
1047         }
1048
1049         if (oparams.num_cq_notif != cq->cnq_notif)
1050                 goto err;
1051
1052         /* Note that we don't need to have explicit code to wait for the
1053          * completion of the event handler because it is invoked from the EQ.
1054          * Since the destroy CQ ramrod has also been received on the EQ we can
1055          * be certain that there's no event handler in process.
1056          */
1057 done:
1058         cq->sig = ~cq->sig;
1059
1060         kfree(cq);
1061
1062         return 0;
1063
1064 err:
1065         DP_ERR(dev,
1066                "CQ %p (icid=%d) not freed, expecting %d ints but got %d ints\n",
1067                cq, cq->icid, oparams.num_cq_notif, cq->cnq_notif);
1068
1069         return -EINVAL;
1070 }
1071
1072 static inline int get_gid_info_from_table(struct ib_qp *ibqp,
1073                                           struct ib_qp_attr *attr,
1074                                           int attr_mask,
1075                                           struct qed_rdma_modify_qp_in_params
1076                                           *qp_params)
1077 {
1078         enum rdma_network_type nw_type;
1079         struct ib_gid_attr gid_attr;
1080         const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1081         union ib_gid gid;
1082         u32 ipv4_addr;
1083         int rc = 0;
1084         int i;
1085
1086         rc = ib_get_cached_gid(ibqp->device,
1087                                rdma_ah_get_port_num(&attr->ah_attr),
1088                                grh->sgid_index, &gid, &gid_attr);
1089         if (rc)
1090                 return rc;
1091
1092         qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
1093
1094         dev_put(gid_attr.ndev);
1095         nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
1096         switch (nw_type) {
1097         case RDMA_NETWORK_IPV6:
1098                 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1099                        sizeof(qp_params->sgid));
1100                 memcpy(&qp_params->dgid.bytes[0],
1101                        &grh->dgid,
1102                        sizeof(qp_params->dgid));
1103                 qp_params->roce_mode = ROCE_V2_IPV6;
1104                 SET_FIELD(qp_params->modify_flags,
1105                           QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1106                 break;
1107         case RDMA_NETWORK_IB:
1108                 memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
1109                        sizeof(qp_params->sgid));
1110                 memcpy(&qp_params->dgid.bytes[0],
1111                        &grh->dgid,
1112                        sizeof(qp_params->dgid));
1113                 qp_params->roce_mode = ROCE_V1;
1114                 break;
1115         case RDMA_NETWORK_IPV4:
1116                 memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
1117                 memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
1118                 ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
1119                 qp_params->sgid.ipv4_addr = ipv4_addr;
1120                 ipv4_addr =
1121                     qedr_get_ipv4_from_gid(grh->dgid.raw);
1122                 qp_params->dgid.ipv4_addr = ipv4_addr;
1123                 SET_FIELD(qp_params->modify_flags,
1124                           QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
1125                 qp_params->roce_mode = ROCE_V2_IPV4;
1126                 break;
1127         }
1128
1129         for (i = 0; i < 4; i++) {
1130                 qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]);
1131                 qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]);
1132         }
1133
1134         if (qp_params->vlan_id >= VLAN_CFI_MASK)
1135                 qp_params->vlan_id = 0;
1136
1137         return 0;
1138 }
1139
1140 static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
1141                                struct ib_qp_init_attr *attrs)
1142 {
1143         struct qedr_device_attr *qattr = &dev->attr;
1144
1145         /* QP0... attrs->qp_type == IB_QPT_GSI */
1146         if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) {
1147                 DP_DEBUG(dev, QEDR_MSG_QP,
1148                          "create qp: unsupported qp type=0x%x requested\n",
1149                          attrs->qp_type);
1150                 return -EINVAL;
1151         }
1152
1153         if (attrs->cap.max_send_wr > qattr->max_sqe) {
1154                 DP_ERR(dev,
1155                        "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n",
1156                        attrs->cap.max_send_wr, qattr->max_sqe);
1157                 return -EINVAL;
1158         }
1159
1160         if (attrs->cap.max_inline_data > qattr->max_inline) {
1161                 DP_ERR(dev,
1162                        "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n",
1163                        attrs->cap.max_inline_data, qattr->max_inline);
1164                 return -EINVAL;
1165         }
1166
1167         if (attrs->cap.max_send_sge > qattr->max_sge) {
1168                 DP_ERR(dev,
1169                        "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n",
1170                        attrs->cap.max_send_sge, qattr->max_sge);
1171                 return -EINVAL;
1172         }
1173
1174         if (attrs->cap.max_recv_sge > qattr->max_sge) {
1175                 DP_ERR(dev,
1176                        "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n",
1177                        attrs->cap.max_recv_sge, qattr->max_sge);
1178                 return -EINVAL;
1179         }
1180
1181         /* Unprivileged user space cannot create special QP */
1182         if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
1183                 DP_ERR(dev,
1184                        "create qp: userspace can't create special QPs of type=0x%x\n",
1185                        attrs->qp_type);
1186                 return -EINVAL;
1187         }
1188
1189         return 0;
1190 }
1191
1192 static void qedr_copy_rq_uresp(struct qedr_dev *dev,
1193                                struct qedr_create_qp_uresp *uresp,
1194                                struct qedr_qp *qp)
1195 {
1196         /* iWARP requires two doorbells per RQ. */
1197         if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
1198                 uresp->rq_db_offset =
1199                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1200                 uresp->rq_db2_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1201         } else {
1202                 uresp->rq_db_offset =
1203                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1204         }
1205
1206         uresp->rq_icid = qp->icid;
1207 }
1208
1209 static void qedr_copy_sq_uresp(struct qedr_dev *dev,
1210                                struct qedr_create_qp_uresp *uresp,
1211                                struct qedr_qp *qp)
1212 {
1213         uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1214
1215         /* iWARP uses the same cid for rq and sq */
1216         if (rdma_protocol_iwarp(&dev->ibdev, 1))
1217                 uresp->sq_icid = qp->icid;
1218         else
1219                 uresp->sq_icid = qp->icid + 1;
1220 }
1221
1222 static int qedr_copy_qp_uresp(struct qedr_dev *dev,
1223                               struct qedr_qp *qp, struct ib_udata *udata)
1224 {
1225         struct qedr_create_qp_uresp uresp;
1226         int rc;
1227
1228         memset(&uresp, 0, sizeof(uresp));
1229         qedr_copy_sq_uresp(dev, &uresp, qp);
1230         qedr_copy_rq_uresp(dev, &uresp, qp);
1231
1232         uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE;
1233         uresp.qp_id = qp->qp_id;
1234
1235         rc = qedr_ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1236         if (rc)
1237                 DP_ERR(dev,
1238                        "create qp: failed a copy to user space with qp icid=0x%x.\n",
1239                        qp->icid);
1240
1241         return rc;
1242 }
1243
1244 static void qedr_set_common_qp_params(struct qedr_dev *dev,
1245                                       struct qedr_qp *qp,
1246                                       struct qedr_pd *pd,
1247                                       struct ib_qp_init_attr *attrs)
1248 {
1249         spin_lock_init(&qp->q_lock);
1250         atomic_set(&qp->refcnt, 1);
1251         qp->pd = pd;
1252         qp->qp_type = attrs->qp_type;
1253         qp->max_inline_data = attrs->cap.max_inline_data;
1254         qp->sq.max_sges = attrs->cap.max_send_sge;
1255         qp->state = QED_ROCE_QP_STATE_RESET;
1256         qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1257         qp->sq_cq = get_qedr_cq(attrs->send_cq);
1258         qp->rq_cq = get_qedr_cq(attrs->recv_cq);
1259         qp->dev = dev;
1260         qp->rq.max_sges = attrs->cap.max_recv_sge;
1261
1262         DP_DEBUG(dev, QEDR_MSG_QP,
1263                  "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n",
1264                  qp->rq.max_sges, qp->rq_cq->icid);
1265         DP_DEBUG(dev, QEDR_MSG_QP,
1266                  "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n",
1267                  pd->pd_id, qp->qp_type, qp->max_inline_data,
1268                  qp->state, qp->signaled, (attrs->srq) ? 1 : 0);
1269         DP_DEBUG(dev, QEDR_MSG_QP,
1270                  "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n",
1271                  qp->sq.max_sges, qp->sq_cq->icid);
1272 }
1273
1274 static void qedr_set_roce_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1275 {
1276         qp->sq.db = dev->db_addr +
1277                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1278         qp->sq.db_data.data.icid = qp->icid + 1;
1279         qp->rq.db = dev->db_addr +
1280                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD);
1281         qp->rq.db_data.data.icid = qp->icid;
1282 }
1283
1284 static inline void
1285 qedr_init_common_qp_in_params(struct qedr_dev *dev,
1286                               struct qedr_pd *pd,
1287                               struct qedr_qp *qp,
1288                               struct ib_qp_init_attr *attrs,
1289                               bool fmr_and_reserved_lkey,
1290                               struct qed_rdma_create_qp_in_params *params)
1291 {
1292         /* QP handle to be written in an async event */
1293         params->qp_handle_async_lo = lower_32_bits((uintptr_t) qp);
1294         params->qp_handle_async_hi = upper_32_bits((uintptr_t) qp);
1295
1296         params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR);
1297         params->fmr_and_reserved_lkey = fmr_and_reserved_lkey;
1298         params->pd = pd->pd_id;
1299         params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi;
1300         params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid;
1301         params->stats_queue = 0;
1302         params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid;
1303         params->srq_id = 0;
1304         params->use_srq = false;
1305 }
1306
1307 static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp)
1308 {
1309         DP_DEBUG(dev, QEDR_MSG_QP, "create qp: successfully created user QP. "
1310                  "qp=%p. "
1311                  "sq_addr=0x%llx, "
1312                  "sq_len=%zd, "
1313                  "rq_addr=0x%llx, "
1314                  "rq_len=%zd"
1315                  "\n",
1316                  qp,
1317                  qp->usq.buf_addr,
1318                  qp->usq.buf_len, qp->urq.buf_addr, qp->urq.buf_len);
1319 }
1320
1321 static int qedr_idr_add(struct qedr_dev *dev, void *ptr, u32 id)
1322 {
1323         int rc;
1324
1325         if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1326                 return 0;
1327
1328         idr_preload(GFP_KERNEL);
1329         spin_lock_irq(&dev->idr_lock);
1330
1331         rc = idr_alloc(&dev->qpidr, ptr, id, id + 1, GFP_ATOMIC);
1332
1333         spin_unlock_irq(&dev->idr_lock);
1334         idr_preload_end();
1335
1336         return rc < 0 ? rc : 0;
1337 }
1338
1339 static void qedr_idr_remove(struct qedr_dev *dev, u32 id)
1340 {
1341         if (!rdma_protocol_iwarp(&dev->ibdev, 1))
1342                 return;
1343
1344         spin_lock_irq(&dev->idr_lock);
1345         idr_remove(&dev->qpidr, id);
1346         spin_unlock_irq(&dev->idr_lock);
1347 }
1348
1349 static inline void
1350 qedr_iwarp_populate_user_qp(struct qedr_dev *dev,
1351                             struct qedr_qp *qp,
1352                             struct qed_rdma_create_qp_out_params *out_params)
1353 {
1354         qp->usq.pbl_tbl->va = out_params->sq_pbl_virt;
1355         qp->usq.pbl_tbl->pa = out_params->sq_pbl_phys;
1356
1357         qedr_populate_pbls(dev, qp->usq.umem, qp->usq.pbl_tbl,
1358                            &qp->usq.pbl_info, FW_PAGE_SHIFT);
1359
1360         qp->urq.pbl_tbl->va = out_params->rq_pbl_virt;
1361         qp->urq.pbl_tbl->pa = out_params->rq_pbl_phys;
1362
1363         qedr_populate_pbls(dev, qp->urq.umem, qp->urq.pbl_tbl,
1364                            &qp->urq.pbl_info, FW_PAGE_SHIFT);
1365 }
1366
1367 static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp)
1368 {
1369         if (qp->usq.umem)
1370                 ib_umem_release(qp->usq.umem);
1371         qp->usq.umem = NULL;
1372
1373         if (qp->urq.umem)
1374                 ib_umem_release(qp->urq.umem);
1375         qp->urq.umem = NULL;
1376 }
1377
1378 static int qedr_create_user_qp(struct qedr_dev *dev,
1379                                struct qedr_qp *qp,
1380                                struct ib_pd *ibpd,
1381                                struct ib_udata *udata,
1382                                struct ib_qp_init_attr *attrs)
1383 {
1384         struct qed_rdma_create_qp_in_params in_params;
1385         struct qed_rdma_create_qp_out_params out_params;
1386         struct qedr_pd *pd = get_qedr_pd(ibpd);
1387         struct ib_ucontext *ib_ctx = NULL;
1388         struct qedr_create_qp_ureq ureq;
1389         int alloc_and_init = rdma_protocol_roce(&dev->ibdev, 1);
1390         int rc = -EINVAL;
1391
1392         ib_ctx = ibpd->uobject->context;
1393
1394         memset(&ureq, 0, sizeof(ureq));
1395         rc = ib_copy_from_udata(&ureq, udata, sizeof(ureq));
1396         if (rc) {
1397                 DP_ERR(dev, "Problem copying data from user space\n");
1398                 return rc;
1399         }
1400
1401         /* SQ - read access only (0), dma sync not required (0) */
1402         rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq.sq_addr,
1403                                   ureq.sq_len, 0, 0, alloc_and_init);
1404         if (rc)
1405                 return rc;
1406
1407         /* RQ - read access only (0), dma sync not required (0) */
1408         rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq.rq_addr,
1409                                   ureq.rq_len, 0, 0, alloc_and_init);
1410         if (rc)
1411                 return rc;
1412
1413         memset(&in_params, 0, sizeof(in_params));
1414         qedr_init_common_qp_in_params(dev, pd, qp, attrs, false, &in_params);
1415         in_params.qp_handle_lo = ureq.qp_handle_lo;
1416         in_params.qp_handle_hi = ureq.qp_handle_hi;
1417         in_params.sq_num_pages = qp->usq.pbl_info.num_pbes;
1418         in_params.sq_pbl_ptr = qp->usq.pbl_tbl->pa;
1419         in_params.rq_num_pages = qp->urq.pbl_info.num_pbes;
1420         in_params.rq_pbl_ptr = qp->urq.pbl_tbl->pa;
1421
1422         qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1423                                               &in_params, &out_params);
1424
1425         if (!qp->qed_qp) {
1426                 rc = -ENOMEM;
1427                 goto err1;
1428         }
1429
1430         if (rdma_protocol_iwarp(&dev->ibdev, 1))
1431                 qedr_iwarp_populate_user_qp(dev, qp, &out_params);
1432
1433         qp->qp_id = out_params.qp_id;
1434         qp->icid = out_params.icid;
1435
1436         rc = qedr_copy_qp_uresp(dev, qp, udata);
1437         if (rc)
1438                 goto err;
1439
1440         qedr_qp_user_print(dev, qp);
1441
1442         return 0;
1443 err:
1444         rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1445         if (rc)
1446                 DP_ERR(dev, "create qp: fatal fault. rc=%d", rc);
1447
1448 err1:
1449         qedr_cleanup_user(dev, qp);
1450         return rc;
1451 }
1452
1453 static void qedr_set_iwarp_db_info(struct qedr_dev *dev, struct qedr_qp *qp)
1454 {
1455         qp->sq.db = dev->db_addr +
1456             DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD);
1457         qp->sq.db_data.data.icid = qp->icid;
1458
1459         qp->rq.db = dev->db_addr +
1460                     DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD);
1461         qp->rq.db_data.data.icid = qp->icid;
1462         qp->rq.iwarp_db2 = dev->db_addr +
1463                            DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_FLAGS);
1464         qp->rq.iwarp_db2_data.data.icid = qp->icid;
1465         qp->rq.iwarp_db2_data.data.value = DQ_TCM_IWARP_POST_RQ_CF_CMD;
1466 }
1467
1468 static int
1469 qedr_roce_create_kernel_qp(struct qedr_dev *dev,
1470                            struct qedr_qp *qp,
1471                            struct qed_rdma_create_qp_in_params *in_params,
1472                            u32 n_sq_elems, u32 n_rq_elems)
1473 {
1474         struct qed_rdma_create_qp_out_params out_params;
1475         int rc;
1476
1477         rc = dev->ops->common->chain_alloc(dev->cdev,
1478                                            QED_CHAIN_USE_TO_PRODUCE,
1479                                            QED_CHAIN_MODE_PBL,
1480                                            QED_CHAIN_CNT_TYPE_U32,
1481                                            n_sq_elems,
1482                                            QEDR_SQE_ELEMENT_SIZE,
1483                                            &qp->sq.pbl, NULL);
1484
1485         if (rc)
1486                 return rc;
1487
1488         in_params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl);
1489         in_params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl);
1490
1491         rc = dev->ops->common->chain_alloc(dev->cdev,
1492                                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1493                                            QED_CHAIN_MODE_PBL,
1494                                            QED_CHAIN_CNT_TYPE_U32,
1495                                            n_rq_elems,
1496                                            QEDR_RQE_ELEMENT_SIZE,
1497                                            &qp->rq.pbl, NULL);
1498         if (rc)
1499                 return rc;
1500
1501         in_params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl);
1502         in_params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl);
1503
1504         qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1505                                               in_params, &out_params);
1506
1507         if (!qp->qed_qp)
1508                 return -EINVAL;
1509
1510         qp->qp_id = out_params.qp_id;
1511         qp->icid = out_params.icid;
1512
1513         qedr_set_roce_db_info(dev, qp);
1514         return rc;
1515 }
1516
1517 static int
1518 qedr_iwarp_create_kernel_qp(struct qedr_dev *dev,
1519                             struct qedr_qp *qp,
1520                             struct qed_rdma_create_qp_in_params *in_params,
1521                             u32 n_sq_elems, u32 n_rq_elems)
1522 {
1523         struct qed_rdma_create_qp_out_params out_params;
1524         struct qed_chain_ext_pbl ext_pbl;
1525         int rc;
1526
1527         in_params->sq_num_pages = QED_CHAIN_PAGE_CNT(n_sq_elems,
1528                                                      QEDR_SQE_ELEMENT_SIZE,
1529                                                      QED_CHAIN_MODE_PBL);
1530         in_params->rq_num_pages = QED_CHAIN_PAGE_CNT(n_rq_elems,
1531                                                      QEDR_RQE_ELEMENT_SIZE,
1532                                                      QED_CHAIN_MODE_PBL);
1533
1534         qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx,
1535                                               in_params, &out_params);
1536
1537         if (!qp->qed_qp)
1538                 return -EINVAL;
1539
1540         /* Now we allocate the chain */
1541         ext_pbl.p_pbl_virt = out_params.sq_pbl_virt;
1542         ext_pbl.p_pbl_phys = out_params.sq_pbl_phys;
1543
1544         rc = dev->ops->common->chain_alloc(dev->cdev,
1545                                            QED_CHAIN_USE_TO_PRODUCE,
1546                                            QED_CHAIN_MODE_PBL,
1547                                            QED_CHAIN_CNT_TYPE_U32,
1548                                            n_sq_elems,
1549                                            QEDR_SQE_ELEMENT_SIZE,
1550                                            &qp->sq.pbl, &ext_pbl);
1551
1552         if (rc)
1553                 goto err;
1554
1555         ext_pbl.p_pbl_virt = out_params.rq_pbl_virt;
1556         ext_pbl.p_pbl_phys = out_params.rq_pbl_phys;
1557
1558         rc = dev->ops->common->chain_alloc(dev->cdev,
1559                                            QED_CHAIN_USE_TO_CONSUME_PRODUCE,
1560                                            QED_CHAIN_MODE_PBL,
1561                                            QED_CHAIN_CNT_TYPE_U32,
1562                                            n_rq_elems,
1563                                            QEDR_RQE_ELEMENT_SIZE,
1564                                            &qp->rq.pbl, &ext_pbl);
1565
1566         if (rc)
1567                 goto err;
1568
1569         qp->qp_id = out_params.qp_id;
1570         qp->icid = out_params.icid;
1571
1572         qedr_set_iwarp_db_info(dev, qp);
1573         return rc;
1574
1575 err:
1576         dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
1577
1578         return rc;
1579 }
1580
1581 static void qedr_cleanup_kernel(struct qedr_dev *dev, struct qedr_qp *qp)
1582 {
1583         dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl);
1584         kfree(qp->wqe_wr_id);
1585
1586         dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl);
1587         kfree(qp->rqe_wr_id);
1588 }
1589
1590 static int qedr_create_kernel_qp(struct qedr_dev *dev,
1591                                  struct qedr_qp *qp,
1592                                  struct ib_pd *ibpd,
1593                                  struct ib_qp_init_attr *attrs)
1594 {
1595         struct qed_rdma_create_qp_in_params in_params;
1596         struct qedr_pd *pd = get_qedr_pd(ibpd);
1597         int rc = -EINVAL;
1598         u32 n_rq_elems;
1599         u32 n_sq_elems;
1600         u32 n_sq_entries;
1601
1602         memset(&in_params, 0, sizeof(in_params));
1603
1604         /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in
1605          * the ring. The ring should allow at least a single WR, even if the
1606          * user requested none, due to allocation issues.
1607          * We should add an extra WR since the prod and cons indices of
1608          * wqe_wr_id are managed in such a way that the WQ is considered full
1609          * when (prod+1)%max_wr==cons. We currently don't do that because we
1610          * double the number of entries due an iSER issue that pushes far more
1611          * WRs than indicated. If we decline its ib_post_send() then we get
1612          * error prints in the dmesg we'd like to avoid.
1613          */
1614         qp->sq.max_wr = min_t(u32, attrs->cap.max_send_wr * dev->wq_multiplier,
1615                               dev->attr.max_sqe);
1616
1617         qp->wqe_wr_id = kzalloc(qp->sq.max_wr * sizeof(*qp->wqe_wr_id),
1618                                 GFP_KERNEL);
1619         if (!qp->wqe_wr_id) {
1620                 DP_ERR(dev, "create qp: failed SQ shadow memory allocation\n");
1621                 return -ENOMEM;
1622         }
1623
1624         /* QP handle to be written in CQE */
1625         in_params.qp_handle_lo = lower_32_bits((uintptr_t) qp);
1626         in_params.qp_handle_hi = upper_32_bits((uintptr_t) qp);
1627
1628         /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in
1629          * the ring. There ring should allow at least a single WR, even if the
1630          * user requested none, due to allocation issues.
1631          */
1632         qp->rq.max_wr = (u16) max_t(u32, attrs->cap.max_recv_wr, 1);
1633
1634         /* Allocate driver internal RQ array */
1635         qp->rqe_wr_id = kzalloc(qp->rq.max_wr * sizeof(*qp->rqe_wr_id),
1636                                 GFP_KERNEL);
1637         if (!qp->rqe_wr_id) {
1638                 DP_ERR(dev,
1639                        "create qp: failed RQ shadow memory allocation\n");
1640                 kfree(qp->wqe_wr_id);
1641                 return -ENOMEM;
1642         }
1643
1644         qedr_init_common_qp_in_params(dev, pd, qp, attrs, true, &in_params);
1645
1646         n_sq_entries = attrs->cap.max_send_wr;
1647         n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe);
1648         n_sq_entries = max_t(u32, n_sq_entries, 1);
1649         n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE;
1650
1651         n_rq_elems = qp->rq.max_wr * QEDR_MAX_RQE_ELEMENTS_PER_RQE;
1652
1653         if (rdma_protocol_iwarp(&dev->ibdev, 1))
1654                 rc = qedr_iwarp_create_kernel_qp(dev, qp, &in_params,
1655                                                  n_sq_elems, n_rq_elems);
1656         else
1657                 rc = qedr_roce_create_kernel_qp(dev, qp, &in_params,
1658                                                 n_sq_elems, n_rq_elems);
1659         if (rc)
1660                 qedr_cleanup_kernel(dev, qp);
1661
1662         return rc;
1663 }
1664
1665 struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
1666                              struct ib_qp_init_attr *attrs,
1667                              struct ib_udata *udata)
1668 {
1669         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
1670         struct qedr_pd *pd = get_qedr_pd(ibpd);
1671         struct qedr_qp *qp;
1672         struct ib_qp *ibqp;
1673         int rc = 0;
1674
1675         DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
1676                  udata ? "user library" : "kernel", pd);
1677
1678         rc = qedr_check_qp_attrs(ibpd, dev, attrs);
1679         if (rc)
1680                 return ERR_PTR(rc);
1681
1682         if (attrs->srq)
1683                 return ERR_PTR(-EINVAL);
1684
1685         DP_DEBUG(dev, QEDR_MSG_QP,
1686                  "create qp: called from %s, event_handler=%p, eepd=%p sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
1687                  udata ? "user library" : "kernel", attrs->event_handler, pd,
1688                  get_qedr_cq(attrs->send_cq),
1689                  get_qedr_cq(attrs->send_cq)->icid,
1690                  get_qedr_cq(attrs->recv_cq),
1691                  get_qedr_cq(attrs->recv_cq)->icid);
1692
1693         qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1694         if (!qp) {
1695                 DP_ERR(dev, "create qp: failed allocating memory\n");
1696                 return ERR_PTR(-ENOMEM);
1697         }
1698
1699         qedr_set_common_qp_params(dev, qp, pd, attrs);
1700
1701         if (attrs->qp_type == IB_QPT_GSI) {
1702                 ibqp = qedr_create_gsi_qp(dev, attrs, qp);
1703                 if (IS_ERR(ibqp))
1704                         kfree(qp);
1705                 return ibqp;
1706         }
1707
1708         if (udata)
1709                 rc = qedr_create_user_qp(dev, qp, ibpd, udata, attrs);
1710         else
1711                 rc = qedr_create_kernel_qp(dev, qp, ibpd, attrs);
1712
1713         if (rc)
1714                 goto err;
1715
1716         qp->ibqp.qp_num = qp->qp_id;
1717
1718         rc = qedr_idr_add(dev, qp, qp->qp_id);
1719         if (rc)
1720                 goto err;
1721
1722         return &qp->ibqp;
1723
1724 err:
1725         kfree(qp);
1726
1727         return ERR_PTR(-EFAULT);
1728 }
1729
1730 static enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state)
1731 {
1732         switch (qp_state) {
1733         case QED_ROCE_QP_STATE_RESET:
1734                 return IB_QPS_RESET;
1735         case QED_ROCE_QP_STATE_INIT:
1736                 return IB_QPS_INIT;
1737         case QED_ROCE_QP_STATE_RTR:
1738                 return IB_QPS_RTR;
1739         case QED_ROCE_QP_STATE_RTS:
1740                 return IB_QPS_RTS;
1741         case QED_ROCE_QP_STATE_SQD:
1742                 return IB_QPS_SQD;
1743         case QED_ROCE_QP_STATE_ERR:
1744                 return IB_QPS_ERR;
1745         case QED_ROCE_QP_STATE_SQE:
1746                 return IB_QPS_SQE;
1747         }
1748         return IB_QPS_ERR;
1749 }
1750
1751 static enum qed_roce_qp_state qedr_get_state_from_ibqp(
1752                                         enum ib_qp_state qp_state)
1753 {
1754         switch (qp_state) {
1755         case IB_QPS_RESET:
1756                 return QED_ROCE_QP_STATE_RESET;
1757         case IB_QPS_INIT:
1758                 return QED_ROCE_QP_STATE_INIT;
1759         case IB_QPS_RTR:
1760                 return QED_ROCE_QP_STATE_RTR;
1761         case IB_QPS_RTS:
1762                 return QED_ROCE_QP_STATE_RTS;
1763         case IB_QPS_SQD:
1764                 return QED_ROCE_QP_STATE_SQD;
1765         case IB_QPS_ERR:
1766                 return QED_ROCE_QP_STATE_ERR;
1767         default:
1768                 return QED_ROCE_QP_STATE_ERR;
1769         }
1770 }
1771
1772 static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph)
1773 {
1774         qed_chain_reset(&qph->pbl);
1775         qph->prod = 0;
1776         qph->cons = 0;
1777         qph->wqe_cons = 0;
1778         qph->db_data.data.value = cpu_to_le16(0);
1779 }
1780
1781 static int qedr_update_qp_state(struct qedr_dev *dev,
1782                                 struct qedr_qp *qp,
1783                                 enum qed_roce_qp_state cur_state,
1784                                 enum qed_roce_qp_state new_state)
1785 {
1786         int status = 0;
1787
1788         if (new_state == cur_state)
1789                 return 0;
1790
1791         switch (cur_state) {
1792         case QED_ROCE_QP_STATE_RESET:
1793                 switch (new_state) {
1794                 case QED_ROCE_QP_STATE_INIT:
1795                         qp->prev_wqe_size = 0;
1796                         qedr_reset_qp_hwq_info(&qp->sq);
1797                         qedr_reset_qp_hwq_info(&qp->rq);
1798                         break;
1799                 default:
1800                         status = -EINVAL;
1801                         break;
1802                 };
1803                 break;
1804         case QED_ROCE_QP_STATE_INIT:
1805                 switch (new_state) {
1806                 case QED_ROCE_QP_STATE_RTR:
1807                         /* Update doorbell (in case post_recv was
1808                          * done before move to RTR)
1809                          */
1810
1811                         if (rdma_protocol_roce(&dev->ibdev, 1)) {
1812                                 writel(qp->rq.db_data.raw, qp->rq.db);
1813                                 /* Make sure write takes effect */
1814                                 mmiowb();
1815                         }
1816                         break;
1817                 case QED_ROCE_QP_STATE_ERR:
1818                         break;
1819                 default:
1820                         /* Invalid state change. */
1821                         status = -EINVAL;
1822                         break;
1823                 };
1824                 break;
1825         case QED_ROCE_QP_STATE_RTR:
1826                 /* RTR->XXX */
1827                 switch (new_state) {
1828                 case QED_ROCE_QP_STATE_RTS:
1829                         break;
1830                 case QED_ROCE_QP_STATE_ERR:
1831                         break;
1832                 default:
1833                         /* Invalid state change. */
1834                         status = -EINVAL;
1835                         break;
1836                 };
1837                 break;
1838         case QED_ROCE_QP_STATE_RTS:
1839                 /* RTS->XXX */
1840                 switch (new_state) {
1841                 case QED_ROCE_QP_STATE_SQD:
1842                         break;
1843                 case QED_ROCE_QP_STATE_ERR:
1844                         break;
1845                 default:
1846                         /* Invalid state change. */
1847                         status = -EINVAL;
1848                         break;
1849                 };
1850                 break;
1851         case QED_ROCE_QP_STATE_SQD:
1852                 /* SQD->XXX */
1853                 switch (new_state) {
1854                 case QED_ROCE_QP_STATE_RTS:
1855                 case QED_ROCE_QP_STATE_ERR:
1856                         break;
1857                 default:
1858                         /* Invalid state change. */
1859                         status = -EINVAL;
1860                         break;
1861                 };
1862                 break;
1863         case QED_ROCE_QP_STATE_ERR:
1864                 /* ERR->XXX */
1865                 switch (new_state) {
1866                 case QED_ROCE_QP_STATE_RESET:
1867                         if ((qp->rq.prod != qp->rq.cons) ||
1868                             (qp->sq.prod != qp->sq.cons)) {
1869                                 DP_NOTICE(dev,
1870                                           "Error->Reset with rq/sq not empty rq.prod=%x rq.cons=%x sq.prod=%x sq.cons=%x\n",
1871                                           qp->rq.prod, qp->rq.cons, qp->sq.prod,
1872                                           qp->sq.cons);
1873                                 status = -EINVAL;
1874                         }
1875                         break;
1876                 default:
1877                         status = -EINVAL;
1878                         break;
1879                 };
1880                 break;
1881         default:
1882                 status = -EINVAL;
1883                 break;
1884         };
1885
1886         return status;
1887 }
1888
1889 int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1890                    int attr_mask, struct ib_udata *udata)
1891 {
1892         struct qedr_qp *qp = get_qedr_qp(ibqp);
1893         struct qed_rdma_modify_qp_in_params qp_params = { 0 };
1894         struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev);
1895         const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
1896         enum ib_qp_state old_qp_state, new_qp_state;
1897         enum qed_roce_qp_state cur_state;
1898         int rc = 0;
1899
1900         DP_DEBUG(dev, QEDR_MSG_QP,
1901                  "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask,
1902                  attr->qp_state);
1903
1904         old_qp_state = qedr_get_ibqp_state(qp->state);
1905         if (attr_mask & IB_QP_STATE)
1906                 new_qp_state = attr->qp_state;
1907         else
1908                 new_qp_state = old_qp_state;
1909
1910         if (rdma_protocol_roce(&dev->ibdev, 1)) {
1911                 if (!ib_modify_qp_is_ok(old_qp_state, new_qp_state,
1912                                         ibqp->qp_type, attr_mask,
1913                                         IB_LINK_LAYER_ETHERNET)) {
1914                         DP_ERR(dev,
1915                                "modify qp: invalid attribute mask=0x%x specified for\n"
1916                                "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n",
1917                                attr_mask, qp->qp_id, ibqp->qp_type,
1918                                old_qp_state, new_qp_state);
1919                         rc = -EINVAL;
1920                         goto err;
1921                 }
1922         }
1923
1924         /* Translate the masks... */
1925         if (attr_mask & IB_QP_STATE) {
1926                 SET_FIELD(qp_params.modify_flags,
1927                           QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1);
1928                 qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state);
1929         }
1930
1931         if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY)
1932                 qp_params.sqd_async = true;
1933
1934         if (attr_mask & IB_QP_PKEY_INDEX) {
1935                 SET_FIELD(qp_params.modify_flags,
1936                           QED_ROCE_MODIFY_QP_VALID_PKEY, 1);
1937                 if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) {
1938                         rc = -EINVAL;
1939                         goto err;
1940                 }
1941
1942                 qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT;
1943         }
1944
1945         if (attr_mask & IB_QP_QKEY)
1946                 qp->qkey = attr->qkey;
1947
1948         if (attr_mask & IB_QP_ACCESS_FLAGS) {
1949                 SET_FIELD(qp_params.modify_flags,
1950                           QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1);
1951                 qp_params.incoming_rdma_read_en = attr->qp_access_flags &
1952                                                   IB_ACCESS_REMOTE_READ;
1953                 qp_params.incoming_rdma_write_en = attr->qp_access_flags &
1954                                                    IB_ACCESS_REMOTE_WRITE;
1955                 qp_params.incoming_atomic_en = attr->qp_access_flags &
1956                                                IB_ACCESS_REMOTE_ATOMIC;
1957         }
1958
1959         if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
1960                 if (rdma_protocol_iwarp(&dev->ibdev, 1))
1961                         return -EINVAL;
1962
1963                 if (attr_mask & IB_QP_PATH_MTU) {
1964                         if (attr->path_mtu < IB_MTU_256 ||
1965                             attr->path_mtu > IB_MTU_4096) {
1966                                 pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n");
1967                                 rc = -EINVAL;
1968                                 goto err;
1969                         }
1970                         qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu),
1971                                       ib_mtu_enum_to_int(iboe_get_mtu
1972                                                          (dev->ndev->mtu)));
1973                 }
1974
1975                 if (!qp->mtu) {
1976                         qp->mtu =
1977                         ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
1978                         pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu);
1979                 }
1980
1981                 SET_FIELD(qp_params.modify_flags,
1982                           QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1);
1983
1984                 qp_params.traffic_class_tos = grh->traffic_class;
1985                 qp_params.flow_label = grh->flow_label;
1986                 qp_params.hop_limit_ttl = grh->hop_limit;
1987
1988                 qp->sgid_idx = grh->sgid_index;
1989
1990                 rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params);
1991                 if (rc) {
1992                         DP_ERR(dev,
1993                                "modify qp: problems with GID index %d (rc=%d)\n",
1994                                grh->sgid_index, rc);
1995                         return rc;
1996                 }
1997
1998                 rc = qedr_get_dmac(dev, &attr->ah_attr,
1999                                    qp_params.remote_mac_addr);
2000                 if (rc)
2001                         return rc;
2002
2003                 qp_params.use_local_mac = true;
2004                 ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr);
2005
2006                 DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n",
2007                          qp_params.dgid.dwords[0], qp_params.dgid.dwords[1],
2008                          qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]);
2009                 DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n",
2010                          qp_params.sgid.dwords[0], qp_params.sgid.dwords[1],
2011                          qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]);
2012                 DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n",
2013                          qp_params.remote_mac_addr);
2014
2015                 qp_params.mtu = qp->mtu;
2016                 qp_params.lb_indication = false;
2017         }
2018
2019         if (!qp_params.mtu) {
2020                 /* Stay with current MTU */
2021                 if (qp->mtu)
2022                         qp_params.mtu = qp->mtu;
2023                 else
2024                         qp_params.mtu =
2025                             ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu));
2026         }
2027
2028         if (attr_mask & IB_QP_TIMEOUT) {
2029                 SET_FIELD(qp_params.modify_flags,
2030                           QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1);
2031
2032                 /* The received timeout value is an exponent used like this:
2033                  *    "12.7.34 LOCAL ACK TIMEOUT
2034                  *    Value representing the transport (ACK) timeout for use by
2035                  *    the remote, expressed as: 4.096 * 2^timeout [usec]"
2036                  * The FW expects timeout in msec so we need to divide the usec
2037                  * result by 1000. We'll approximate 1000~2^10, and 4.096 ~ 2^2,
2038                  * so we get: 2^2 * 2^timeout / 2^10 = 2^(timeout - 8).
2039                  * The value of zero means infinite so we use a 'max_t' to make
2040                  * sure that sub 1 msec values will be configured as 1 msec.
2041                  */
2042                 if (attr->timeout)
2043                         qp_params.ack_timeout =
2044                                         1 << max_t(int, attr->timeout - 8, 0);
2045                 else
2046                         qp_params.ack_timeout = 0;
2047         }
2048
2049         if (attr_mask & IB_QP_RETRY_CNT) {
2050                 SET_FIELD(qp_params.modify_flags,
2051                           QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1);
2052                 qp_params.retry_cnt = attr->retry_cnt;
2053         }
2054
2055         if (attr_mask & IB_QP_RNR_RETRY) {
2056                 SET_FIELD(qp_params.modify_flags,
2057                           QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1);
2058                 qp_params.rnr_retry_cnt = attr->rnr_retry;
2059         }
2060
2061         if (attr_mask & IB_QP_RQ_PSN) {
2062                 SET_FIELD(qp_params.modify_flags,
2063                           QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1);
2064                 qp_params.rq_psn = attr->rq_psn;
2065                 qp->rq_psn = attr->rq_psn;
2066         }
2067
2068         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2069                 if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) {
2070                         rc = -EINVAL;
2071                         DP_ERR(dev,
2072                                "unsupported max_rd_atomic=%d, supported=%d\n",
2073                                attr->max_rd_atomic,
2074                                dev->attr.max_qp_req_rd_atomic_resc);
2075                         goto err;
2076                 }
2077
2078                 SET_FIELD(qp_params.modify_flags,
2079                           QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1);
2080                 qp_params.max_rd_atomic_req = attr->max_rd_atomic;
2081         }
2082
2083         if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2084                 SET_FIELD(qp_params.modify_flags,
2085                           QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1);
2086                 qp_params.min_rnr_nak_timer = attr->min_rnr_timer;
2087         }
2088
2089         if (attr_mask & IB_QP_SQ_PSN) {
2090                 SET_FIELD(qp_params.modify_flags,
2091                           QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1);
2092                 qp_params.sq_psn = attr->sq_psn;
2093                 qp->sq_psn = attr->sq_psn;
2094         }
2095
2096         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2097                 if (attr->max_dest_rd_atomic >
2098                     dev->attr.max_qp_resp_rd_atomic_resc) {
2099                         DP_ERR(dev,
2100                                "unsupported max_dest_rd_atomic=%d, supported=%d\n",
2101                                attr->max_dest_rd_atomic,
2102                                dev->attr.max_qp_resp_rd_atomic_resc);
2103
2104                         rc = -EINVAL;
2105                         goto err;
2106                 }
2107
2108                 SET_FIELD(qp_params.modify_flags,
2109                           QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1);
2110                 qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic;
2111         }
2112
2113         if (attr_mask & IB_QP_DEST_QPN) {
2114                 SET_FIELD(qp_params.modify_flags,
2115                           QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1);
2116
2117                 qp_params.dest_qp = attr->dest_qp_num;
2118                 qp->dest_qp_num = attr->dest_qp_num;
2119         }
2120
2121         cur_state = qp->state;
2122
2123         /* Update the QP state before the actual ramrod to prevent a race with
2124          * fast path. Modifying the QP state to error will cause the device to
2125          * flush the CQEs and while polling the flushed CQEs will considered as
2126          * a potential issue if the QP isn't in error state.
2127          */
2128         if ((attr_mask & IB_QP_STATE) && qp->qp_type != IB_QPT_GSI &&
2129             !udata && qp_params.new_state == QED_ROCE_QP_STATE_ERR)
2130                 qp->state = QED_ROCE_QP_STATE_ERR;
2131
2132         if (qp->qp_type != IB_QPT_GSI)
2133                 rc = dev->ops->rdma_modify_qp(dev->rdma_ctx,
2134                                               qp->qed_qp, &qp_params);
2135
2136         if (attr_mask & IB_QP_STATE) {
2137                 if ((qp->qp_type != IB_QPT_GSI) && (!udata))
2138                         rc = qedr_update_qp_state(dev, qp, cur_state,
2139                                                   qp_params.new_state);
2140                 qp->state = qp_params.new_state;
2141         }
2142
2143 err:
2144         return rc;
2145 }
2146
2147 static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params)
2148 {
2149         int ib_qp_acc_flags = 0;
2150
2151         if (params->incoming_rdma_write_en)
2152                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
2153         if (params->incoming_rdma_read_en)
2154                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ;
2155         if (params->incoming_atomic_en)
2156                 ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC;
2157         ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
2158         return ib_qp_acc_flags;
2159 }
2160
2161 int qedr_query_qp(struct ib_qp *ibqp,
2162                   struct ib_qp_attr *qp_attr,
2163                   int attr_mask, struct ib_qp_init_attr *qp_init_attr)
2164 {
2165         struct qed_rdma_query_qp_out_params params;
2166         struct qedr_qp *qp = get_qedr_qp(ibqp);
2167         struct qedr_dev *dev = qp->dev;
2168         int rc = 0;
2169
2170         memset(&params, 0, sizeof(params));
2171
2172         rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, &params);
2173         if (rc)
2174                 goto err;
2175
2176         memset(qp_attr, 0, sizeof(*qp_attr));
2177         memset(qp_init_attr, 0, sizeof(*qp_init_attr));
2178
2179         qp_attr->qp_state = qedr_get_ibqp_state(params.state);
2180         qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state);
2181         qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu);
2182         qp_attr->path_mig_state = IB_MIG_MIGRATED;
2183         qp_attr->rq_psn = params.rq_psn;
2184         qp_attr->sq_psn = params.sq_psn;
2185         qp_attr->dest_qp_num = params.dest_qp;
2186
2187         qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(&params);
2188
2189         qp_attr->cap.max_send_wr = qp->sq.max_wr;
2190         qp_attr->cap.max_recv_wr = qp->rq.max_wr;
2191         qp_attr->cap.max_send_sge = qp->sq.max_sges;
2192         qp_attr->cap.max_recv_sge = qp->rq.max_sges;
2193         qp_attr->cap.max_inline_data = ROCE_REQ_MAX_INLINE_DATA_SIZE;
2194         qp_init_attr->cap = qp_attr->cap;
2195
2196         qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
2197         rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
2198                         params.flow_label, qp->sgid_idx,
2199                         params.hop_limit_ttl, params.traffic_class_tos);
2200         rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid.bytes[0]);
2201         rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
2202         rdma_ah_set_sl(&qp_attr->ah_attr, 0);
2203         qp_attr->timeout = params.timeout;
2204         qp_attr->rnr_retry = params.rnr_retry;
2205         qp_attr->retry_cnt = params.retry_cnt;
2206         qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
2207         qp_attr->pkey_index = params.pkey_index;
2208         qp_attr->port_num = 1;
2209         rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
2210         rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
2211         qp_attr->alt_pkey_index = 0;
2212         qp_attr->alt_port_num = 0;
2213         qp_attr->alt_timeout = 0;
2214         memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
2215
2216         qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0;
2217         qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic;
2218         qp_attr->max_rd_atomic = params.max_rd_atomic;
2219         qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0;
2220
2221         DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n",
2222                  qp_attr->cap.max_inline_data);
2223
2224 err:
2225         return rc;
2226 }
2227
2228 static int qedr_free_qp_resources(struct qedr_dev *dev, struct qedr_qp *qp)
2229 {
2230         int rc = 0;
2231
2232         if (qp->qp_type != IB_QPT_GSI) {
2233                 rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp);
2234                 if (rc)
2235                         return rc;
2236         }
2237
2238         if (qp->ibqp.uobject && qp->ibqp.uobject->context)
2239                 qedr_cleanup_user(dev, qp);
2240         else
2241                 qedr_cleanup_kernel(dev, qp);
2242
2243         return 0;
2244 }
2245
2246 int qedr_destroy_qp(struct ib_qp *ibqp)
2247 {
2248         struct qedr_qp *qp = get_qedr_qp(ibqp);
2249         struct qedr_dev *dev = qp->dev;
2250         struct ib_qp_attr attr;
2251         int attr_mask = 0;
2252         int rc = 0;
2253
2254         DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n",
2255                  qp, qp->qp_type);
2256
2257         if (rdma_protocol_roce(&dev->ibdev, 1)) {
2258                 if ((qp->state != QED_ROCE_QP_STATE_RESET) &&
2259                     (qp->state != QED_ROCE_QP_STATE_ERR) &&
2260                     (qp->state != QED_ROCE_QP_STATE_INIT)) {
2261
2262                         attr.qp_state = IB_QPS_ERR;
2263                         attr_mask |= IB_QP_STATE;
2264
2265                         /* Change the QP state to ERROR */
2266                         qedr_modify_qp(ibqp, &attr, attr_mask, NULL);
2267                 }
2268         } else {
2269                 /* Wait for the connect/accept to complete */
2270                 if (qp->ep) {
2271                         int wait_count = 1;
2272
2273                         while (qp->ep->during_connect) {
2274                                 DP_DEBUG(dev, QEDR_MSG_QP,
2275                                          "Still in during connect/accept\n");
2276
2277                                 msleep(100);
2278                                 if (wait_count++ > 200) {
2279                                         DP_NOTICE(dev,
2280                                                   "during connect timeout\n");
2281                                         break;
2282                                 }
2283                         }
2284                 }
2285         }
2286
2287         if (qp->qp_type == IB_QPT_GSI)
2288                 qedr_destroy_gsi_qp(dev);
2289
2290         qedr_free_qp_resources(dev, qp);
2291
2292         if (atomic_dec_and_test(&qp->refcnt)) {
2293                 qedr_idr_remove(dev, qp->qp_id);
2294                 kfree(qp);
2295         }
2296         return rc;
2297 }
2298
2299 struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
2300                              struct ib_udata *udata)
2301 {
2302         struct qedr_ah *ah;
2303
2304         ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
2305         if (!ah)
2306                 return ERR_PTR(-ENOMEM);
2307
2308         ah->attr = *attr;
2309
2310         return &ah->ibah;
2311 }
2312
2313 int qedr_destroy_ah(struct ib_ah *ibah)
2314 {
2315         struct qedr_ah *ah = get_qedr_ah(ibah);
2316
2317         kfree(ah);
2318         return 0;
2319 }
2320
2321 static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
2322 {
2323         struct qedr_pbl *pbl, *tmp;
2324
2325         if (info->pbl_table)
2326                 list_add_tail(&info->pbl_table->list_entry,
2327                               &info->free_pbl_list);
2328
2329         if (!list_empty(&info->inuse_pbl_list))
2330                 list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
2331
2332         list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
2333                 list_del(&pbl->list_entry);
2334                 qedr_free_pbl(dev, &info->pbl_info, pbl);
2335         }
2336 }
2337
2338 static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
2339                         size_t page_list_len, bool two_layered)
2340 {
2341         struct qedr_pbl *tmp;
2342         int rc;
2343
2344         INIT_LIST_HEAD(&info->free_pbl_list);
2345         INIT_LIST_HEAD(&info->inuse_pbl_list);
2346
2347         rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
2348                                   page_list_len, two_layered);
2349         if (rc)
2350                 goto done;
2351
2352         info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2353         if (IS_ERR(info->pbl_table)) {
2354                 rc = PTR_ERR(info->pbl_table);
2355                 goto done;
2356         }
2357
2358         DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
2359                  &info->pbl_table->pa);
2360
2361         /* in usual case we use 2 PBLs, so we add one to free
2362          * list and allocating another one
2363          */
2364         tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
2365         if (IS_ERR(tmp)) {
2366                 DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
2367                 goto done;
2368         }
2369
2370         list_add_tail(&tmp->list_entry, &info->free_pbl_list);
2371
2372         DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
2373
2374 done:
2375         if (rc)
2376                 free_mr_info(dev, info);
2377
2378         return rc;
2379 }
2380
2381 struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
2382                                u64 usr_addr, int acc, struct ib_udata *udata)
2383 {
2384         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2385         struct qedr_mr *mr;
2386         struct qedr_pd *pd;
2387         int rc = -ENOMEM;
2388
2389         pd = get_qedr_pd(ibpd);
2390         DP_DEBUG(dev, QEDR_MSG_MR,
2391                  "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
2392                  pd->pd_id, start, len, usr_addr, acc);
2393
2394         if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
2395                 return ERR_PTR(-EINVAL);
2396
2397         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2398         if (!mr)
2399                 return ERR_PTR(rc);
2400
2401         mr->type = QEDR_MR_USER;
2402
2403         mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
2404         if (IS_ERR(mr->umem)) {
2405                 rc = -EFAULT;
2406                 goto err0;
2407         }
2408
2409         rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
2410         if (rc)
2411                 goto err1;
2412
2413         qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
2414                            &mr->info.pbl_info, mr->umem->page_shift);
2415
2416         rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2417         if (rc) {
2418                 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2419                 goto err1;
2420         }
2421
2422         /* Index only, 18 bit long, lkey = itid << 8 | key */
2423         mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2424         mr->hw_mr.key = 0;
2425         mr->hw_mr.pd = pd->pd_id;
2426         mr->hw_mr.local_read = 1;
2427         mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2428         mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2429         mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2430         mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2431         mr->hw_mr.mw_bind = false;
2432         mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
2433         mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2434         mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2435         mr->hw_mr.page_size_log = mr->umem->page_shift;
2436         mr->hw_mr.fbo = ib_umem_offset(mr->umem);
2437         mr->hw_mr.length = len;
2438         mr->hw_mr.vaddr = usr_addr;
2439         mr->hw_mr.zbva = false;
2440         mr->hw_mr.phy_mr = false;
2441         mr->hw_mr.dma_mr = false;
2442
2443         rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2444         if (rc) {
2445                 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2446                 goto err2;
2447         }
2448
2449         mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2450         if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2451             mr->hw_mr.remote_atomic)
2452                 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2453
2454         DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
2455                  mr->ibmr.lkey);
2456         return &mr->ibmr;
2457
2458 err2:
2459         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2460 err1:
2461         qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2462 err0:
2463         kfree(mr);
2464         return ERR_PTR(rc);
2465 }
2466
2467 int qedr_dereg_mr(struct ib_mr *ib_mr)
2468 {
2469         struct qedr_mr *mr = get_qedr_mr(ib_mr);
2470         struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
2471         int rc = 0;
2472
2473         rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
2474         if (rc)
2475                 return rc;
2476
2477         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2478
2479         if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
2480                 qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
2481
2482         /* it could be user registered memory. */
2483         if (mr->umem)
2484                 ib_umem_release(mr->umem);
2485
2486         kfree(mr);
2487
2488         return rc;
2489 }
2490
2491 static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd,
2492                                        int max_page_list_len)
2493 {
2494         struct qedr_pd *pd = get_qedr_pd(ibpd);
2495         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2496         struct qedr_mr *mr;
2497         int rc = -ENOMEM;
2498
2499         DP_DEBUG(dev, QEDR_MSG_MR,
2500                  "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
2501                  max_page_list_len);
2502
2503         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2504         if (!mr)
2505                 return ERR_PTR(rc);
2506
2507         mr->dev = dev;
2508         mr->type = QEDR_MR_FRMR;
2509
2510         rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
2511         if (rc)
2512                 goto err0;
2513
2514         rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2515         if (rc) {
2516                 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2517                 goto err0;
2518         }
2519
2520         /* Index only, 18 bit long, lkey = itid << 8 | key */
2521         mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
2522         mr->hw_mr.key = 0;
2523         mr->hw_mr.pd = pd->pd_id;
2524         mr->hw_mr.local_read = 1;
2525         mr->hw_mr.local_write = 0;
2526         mr->hw_mr.remote_read = 0;
2527         mr->hw_mr.remote_write = 0;
2528         mr->hw_mr.remote_atomic = 0;
2529         mr->hw_mr.mw_bind = false;
2530         mr->hw_mr.pbl_ptr = 0;
2531         mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
2532         mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
2533         mr->hw_mr.fbo = 0;
2534         mr->hw_mr.length = 0;
2535         mr->hw_mr.vaddr = 0;
2536         mr->hw_mr.zbva = false;
2537         mr->hw_mr.phy_mr = true;
2538         mr->hw_mr.dma_mr = false;
2539
2540         rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2541         if (rc) {
2542                 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2543                 goto err1;
2544         }
2545
2546         mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2547         mr->ibmr.rkey = mr->ibmr.lkey;
2548
2549         DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
2550         return mr;
2551
2552 err1:
2553         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2554 err0:
2555         kfree(mr);
2556         return ERR_PTR(rc);
2557 }
2558
2559 struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
2560                             enum ib_mr_type mr_type, u32 max_num_sg)
2561 {
2562         struct qedr_mr *mr;
2563
2564         if (mr_type != IB_MR_TYPE_MEM_REG)
2565                 return ERR_PTR(-EINVAL);
2566
2567         mr = __qedr_alloc_mr(ibpd, max_num_sg);
2568
2569         if (IS_ERR(mr))
2570                 return ERR_PTR(-EINVAL);
2571
2572         return &mr->ibmr;
2573 }
2574
2575 static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
2576 {
2577         struct qedr_mr *mr = get_qedr_mr(ibmr);
2578         struct qedr_pbl *pbl_table;
2579         struct regpair *pbe;
2580         u32 pbes_in_page;
2581
2582         if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
2583                 DP_ERR(mr->dev, "qedr_set_page fails when %d\n", mr->npages);
2584                 return -ENOMEM;
2585         }
2586
2587         DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
2588                  mr->npages, addr);
2589
2590         pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
2591         pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
2592         pbe = (struct regpair *)pbl_table->va;
2593         pbe +=  mr->npages % pbes_in_page;
2594         pbe->lo = cpu_to_le32((u32)addr);
2595         pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
2596
2597         mr->npages++;
2598
2599         return 0;
2600 }
2601
2602 static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
2603 {
2604         int work = info->completed - info->completed_handled - 1;
2605
2606         DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
2607         while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
2608                 struct qedr_pbl *pbl;
2609
2610                 /* Free all the page list that are possible to be freed
2611                  * (all the ones that were invalidated), under the assumption
2612                  * that if an FMR was completed successfully that means that
2613                  * if there was an invalidate operation before it also ended
2614                  */
2615                 pbl = list_first_entry(&info->inuse_pbl_list,
2616                                        struct qedr_pbl, list_entry);
2617                 list_move_tail(&pbl->list_entry, &info->free_pbl_list);
2618                 info->completed_handled++;
2619         }
2620 }
2621
2622 int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
2623                    int sg_nents, unsigned int *sg_offset)
2624 {
2625         struct qedr_mr *mr = get_qedr_mr(ibmr);
2626
2627         mr->npages = 0;
2628
2629         handle_completed_mrs(mr->dev, &mr->info);
2630         return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
2631 }
2632
2633 struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
2634 {
2635         struct qedr_dev *dev = get_qedr_dev(ibpd->device);
2636         struct qedr_pd *pd = get_qedr_pd(ibpd);
2637         struct qedr_mr *mr;
2638         int rc;
2639
2640         mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2641         if (!mr)
2642                 return ERR_PTR(-ENOMEM);
2643
2644         mr->type = QEDR_MR_DMA;
2645
2646         rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
2647         if (rc) {
2648                 DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
2649                 goto err1;
2650         }
2651
2652         /* index only, 18 bit long, lkey = itid << 8 | key */
2653         mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
2654         mr->hw_mr.pd = pd->pd_id;
2655         mr->hw_mr.local_read = 1;
2656         mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
2657         mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
2658         mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
2659         mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
2660         mr->hw_mr.dma_mr = true;
2661
2662         rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
2663         if (rc) {
2664                 DP_ERR(dev, "roce register tid returned an error %d\n", rc);
2665                 goto err2;
2666         }
2667
2668         mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2669         if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
2670             mr->hw_mr.remote_atomic)
2671                 mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
2672
2673         DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
2674         return &mr->ibmr;
2675
2676 err2:
2677         dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
2678 err1:
2679         kfree(mr);
2680         return ERR_PTR(rc);
2681 }
2682
2683 static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq)
2684 {
2685         return (((wq->prod + 1) % wq->max_wr) == wq->cons);
2686 }
2687
2688 static int sge_data_len(struct ib_sge *sg_list, int num_sge)
2689 {
2690         int i, len = 0;
2691
2692         for (i = 0; i < num_sge; i++)
2693                 len += sg_list[i].length;
2694
2695         return len;
2696 }
2697
2698 static void swap_wqe_data64(u64 *p)
2699 {
2700         int i;
2701
2702         for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++)
2703                 *p = cpu_to_be64(cpu_to_le64(*p));
2704 }
2705
2706 static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev,
2707                                        struct qedr_qp *qp, u8 *wqe_size,
2708                                        struct ib_send_wr *wr,
2709                                        struct ib_send_wr **bad_wr, u8 *bits,
2710                                        u8 bit)
2711 {
2712         u32 data_size = sge_data_len(wr->sg_list, wr->num_sge);
2713         char *seg_prt, *wqe;
2714         int i, seg_siz;
2715
2716         if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
2717                 DP_ERR(dev, "Too much inline data in WR: %d\n", data_size);
2718                 *bad_wr = wr;
2719                 return 0;
2720         }
2721
2722         if (!data_size)
2723                 return data_size;
2724
2725         *bits |= bit;
2726
2727         seg_prt = NULL;
2728         wqe = NULL;
2729         seg_siz = 0;
2730
2731         /* Copy data inline */
2732         for (i = 0; i < wr->num_sge; i++) {
2733                 u32 len = wr->sg_list[i].length;
2734                 void *src = (void *)(uintptr_t)wr->sg_list[i].addr;
2735
2736                 while (len > 0) {
2737                         u32 cur;
2738
2739                         /* New segment required */
2740                         if (!seg_siz) {
2741                                 wqe = (char *)qed_chain_produce(&qp->sq.pbl);
2742                                 seg_prt = wqe;
2743                                 seg_siz = sizeof(struct rdma_sq_common_wqe);
2744                                 (*wqe_size)++;
2745                         }
2746
2747                         /* Calculate currently allowed length */
2748                         cur = min_t(u32, len, seg_siz);
2749                         memcpy(seg_prt, src, cur);
2750
2751                         /* Update segment variables */
2752                         seg_prt += cur;
2753                         seg_siz -= cur;
2754
2755                         /* Update sge variables */
2756                         src += cur;
2757                         len -= cur;
2758
2759                         /* Swap fully-completed segments */
2760                         if (!seg_siz)
2761                                 swap_wqe_data64((u64 *)wqe);
2762                 }
2763         }
2764
2765         /* swap last not completed segment */
2766         if (seg_siz)
2767                 swap_wqe_data64((u64 *)wqe);
2768
2769         return data_size;
2770 }
2771
2772 #define RQ_SGE_SET(sge, vaddr, vlength, vflags)                 \
2773         do {                                                    \
2774                 DMA_REGPAIR_LE(sge->addr, vaddr);               \
2775                 (sge)->length = cpu_to_le32(vlength);           \
2776                 (sge)->flags = cpu_to_le32(vflags);             \
2777         } while (0)
2778
2779 #define SRQ_HDR_SET(hdr, vwr_id, num_sge)                       \
2780         do {                                                    \
2781                 DMA_REGPAIR_LE(hdr->wr_id, vwr_id);             \
2782                 (hdr)->num_sges = num_sge;                      \
2783         } while (0)
2784
2785 #define SRQ_SGE_SET(sge, vaddr, vlength, vlkey)                 \
2786         do {                                                    \
2787                 DMA_REGPAIR_LE(sge->addr, vaddr);               \
2788                 (sge)->length = cpu_to_le32(vlength);           \
2789                 (sge)->l_key = cpu_to_le32(vlkey);              \
2790         } while (0)
2791
2792 static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size,
2793                                 struct ib_send_wr *wr)
2794 {
2795         u32 data_size = 0;
2796         int i;
2797
2798         for (i = 0; i < wr->num_sge; i++) {
2799                 struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl);
2800
2801                 DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr);
2802                 sge->l_key = cpu_to_le32(wr->sg_list[i].lkey);
2803                 sge->length = cpu_to_le32(wr->sg_list[i].length);
2804                 data_size += wr->sg_list[i].length;
2805         }
2806
2807         if (wqe_size)
2808                 *wqe_size += wr->num_sge;
2809
2810         return data_size;
2811 }
2812
2813 static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev,
2814                                      struct qedr_qp *qp,
2815                                      struct rdma_sq_rdma_wqe_1st *rwqe,
2816                                      struct rdma_sq_rdma_wqe_2nd *rwqe2,
2817                                      struct ib_send_wr *wr,
2818                                      struct ib_send_wr **bad_wr)
2819 {
2820         rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey);
2821         DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr);
2822
2823         if (wr->send_flags & IB_SEND_INLINE &&
2824             (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
2825              wr->opcode == IB_WR_RDMA_WRITE)) {
2826                 u8 flags = 0;
2827
2828                 SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
2829                 return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr,
2830                                                    bad_wr, &rwqe->flags, flags);
2831         }
2832
2833         return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
2834 }
2835
2836 static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev,
2837                                      struct qedr_qp *qp,
2838                                      struct rdma_sq_send_wqe_1st *swqe,
2839                                      struct rdma_sq_send_wqe_2st *swqe2,
2840                                      struct ib_send_wr *wr,
2841                                      struct ib_send_wr **bad_wr)
2842 {
2843         memset(swqe2, 0, sizeof(*swqe2));
2844         if (wr->send_flags & IB_SEND_INLINE) {
2845                 u8 flags = 0;
2846
2847                 SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
2848                 return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr,
2849                                                    bad_wr, &swqe->flags, flags);
2850         }
2851
2852         return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
2853 }
2854
2855 static int qedr_prepare_reg(struct qedr_qp *qp,
2856                             struct rdma_sq_fmr_wqe_1st *fwqe1,
2857                             struct ib_reg_wr *wr)
2858 {
2859         struct qedr_mr *mr = get_qedr_mr(wr->mr);
2860         struct rdma_sq_fmr_wqe_2nd *fwqe2;
2861
2862         fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl);
2863         fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova);
2864         fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova);
2865         fwqe1->l_key = wr->key;
2866
2867         fwqe2->access_ctrl = 0;
2868
2869         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ,
2870                    !!(wr->access & IB_ACCESS_REMOTE_READ));
2871         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE,
2872                    !!(wr->access & IB_ACCESS_REMOTE_WRITE));
2873         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC,
2874                    !!(wr->access & IB_ACCESS_REMOTE_ATOMIC));
2875         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1);
2876         SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE,
2877                    !!(wr->access & IB_ACCESS_LOCAL_WRITE));
2878         fwqe2->fmr_ctrl = 0;
2879
2880         SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG,
2881                    ilog2(mr->ibmr.page_size) - 12);
2882
2883         fwqe2->length_hi = 0;
2884         fwqe2->length_lo = mr->ibmr.length;
2885         fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa);
2886         fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa);
2887
2888         qp->wqe_wr_id[qp->sq.prod].mr = mr;
2889
2890         return 0;
2891 }
2892
2893 static enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode)
2894 {
2895         switch (opcode) {
2896         case IB_WR_RDMA_WRITE:
2897         case IB_WR_RDMA_WRITE_WITH_IMM:
2898                 return IB_WC_RDMA_WRITE;
2899         case IB_WR_SEND_WITH_IMM:
2900         case IB_WR_SEND:
2901         case IB_WR_SEND_WITH_INV:
2902                 return IB_WC_SEND;
2903         case IB_WR_RDMA_READ:
2904         case IB_WR_RDMA_READ_WITH_INV:
2905                 return IB_WC_RDMA_READ;
2906         case IB_WR_ATOMIC_CMP_AND_SWP:
2907                 return IB_WC_COMP_SWAP;
2908         case IB_WR_ATOMIC_FETCH_AND_ADD:
2909                 return IB_WC_FETCH_ADD;
2910         case IB_WR_REG_MR:
2911                 return IB_WC_REG_MR;
2912         case IB_WR_LOCAL_INV:
2913                 return IB_WC_LOCAL_INV;
2914         default:
2915                 return IB_WC_SEND;
2916         }
2917 }
2918
2919 static inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr)
2920 {
2921         int wq_is_full, err_wr, pbl_is_full;
2922         struct qedr_dev *dev = qp->dev;
2923
2924         /* prevent SQ overflow and/or processing of a bad WR */
2925         err_wr = wr->num_sge > qp->sq.max_sges;
2926         wq_is_full = qedr_wq_is_full(&qp->sq);
2927         pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) <
2928                       QEDR_MAX_SQE_ELEMENTS_PER_SQE;
2929         if (wq_is_full || err_wr || pbl_is_full) {
2930                 if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) {
2931                         DP_ERR(dev,
2932                                "error: WQ is full. Post send on QP %p failed (this error appears only once)\n",
2933                                qp);
2934                         qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL;
2935                 }
2936
2937                 if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) {
2938                         DP_ERR(dev,
2939                                "error: WR is bad. Post send on QP %p failed (this error appears only once)\n",
2940                                qp);
2941                         qp->err_bitmap |= QEDR_QP_ERR_BAD_SR;
2942                 }
2943
2944                 if (pbl_is_full &&
2945                     !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) {
2946                         DP_ERR(dev,
2947                                "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n",
2948                                qp);
2949                         qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL;
2950                 }
2951                 return false;
2952         }
2953         return true;
2954 }
2955
2956 static int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2957                      struct ib_send_wr **bad_wr)
2958 {
2959         struct qedr_dev *dev = get_qedr_dev(ibqp->device);
2960         struct qedr_qp *qp = get_qedr_qp(ibqp);
2961         struct rdma_sq_atomic_wqe_1st *awqe1;
2962         struct rdma_sq_atomic_wqe_2nd *awqe2;
2963         struct rdma_sq_atomic_wqe_3rd *awqe3;
2964         struct rdma_sq_send_wqe_2st *swqe2;
2965         struct rdma_sq_local_inv_wqe *iwqe;
2966         struct rdma_sq_rdma_wqe_2nd *rwqe2;
2967         struct rdma_sq_send_wqe_1st *swqe;
2968         struct rdma_sq_rdma_wqe_1st *rwqe;
2969         struct rdma_sq_fmr_wqe_1st *fwqe1;
2970         struct rdma_sq_common_wqe *wqe;
2971         u32 length;
2972         int rc = 0;
2973         bool comp;
2974
2975         if (!qedr_can_post_send(qp, wr)) {
2976                 *bad_wr = wr;
2977                 return -ENOMEM;
2978         }
2979
2980         wqe = qed_chain_produce(&qp->sq.pbl);
2981         qp->wqe_wr_id[qp->sq.prod].signaled =
2982                 !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled;
2983
2984         wqe->flags = 0;
2985         SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG,
2986                    !!(wr->send_flags & IB_SEND_SOLICITED));
2987         comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled;
2988         SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp);
2989         SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG,
2990                    !!(wr->send_flags & IB_SEND_FENCE));
2991         wqe->prev_wqe_size = qp->prev_wqe_size;
2992
2993         qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode);
2994
2995         switch (wr->opcode) {
2996         case IB_WR_SEND_WITH_IMM:
2997                 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
2998                         rc = -EINVAL;
2999                         *bad_wr = wr;
3000                         break;
3001                 }
3002                 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
3003                 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3004                 swqe->wqe_size = 2;
3005                 swqe2 = qed_chain_produce(&qp->sq.pbl);
3006
3007                 swqe->inv_key_or_imm_data = cpu_to_le32(be32_to_cpu(wr->ex.imm_data));
3008                 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3009                                                    wr, bad_wr);
3010                 swqe->length = cpu_to_le32(length);
3011                 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3012                 qp->prev_wqe_size = swqe->wqe_size;
3013                 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3014                 break;
3015         case IB_WR_SEND:
3016                 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
3017                 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3018
3019                 swqe->wqe_size = 2;
3020                 swqe2 = qed_chain_produce(&qp->sq.pbl);
3021                 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3022                                                    wr, bad_wr);
3023                 swqe->length = cpu_to_le32(length);
3024                 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3025                 qp->prev_wqe_size = swqe->wqe_size;
3026                 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3027                 break;
3028         case IB_WR_SEND_WITH_INV:
3029                 wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE;
3030                 swqe = (struct rdma_sq_send_wqe_1st *)wqe;
3031                 swqe2 = qed_chain_produce(&qp->sq.pbl);
3032                 swqe->wqe_size = 2;
3033                 swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey);
3034                 length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2,
3035                                                    wr, bad_wr);
3036                 swqe->length = cpu_to_le32(length);
3037                 qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
3038                 qp->prev_wqe_size = swqe->wqe_size;
3039                 qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
3040                 break;
3041
3042         case IB_WR_RDMA_WRITE_WITH_IMM:
3043                 if (unlikely(rdma_protocol_iwarp(&dev->ibdev, 1))) {
3044                         rc = -EINVAL;
3045                         *bad_wr = wr;
3046                         break;
3047                 }
3048                 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
3049                 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3050
3051                 rwqe->wqe_size = 2;
3052                 rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data));
3053                 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3054                 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3055                                                    wr, bad_wr);
3056                 rwqe->length = cpu_to_le32(length);
3057                 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3058                 qp->prev_wqe_size = rwqe->wqe_size;
3059                 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3060                 break;
3061         case IB_WR_RDMA_WRITE:
3062                 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
3063                 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3064
3065                 rwqe->wqe_size = 2;
3066                 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3067                 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3068                                                    wr, bad_wr);
3069                 rwqe->length = cpu_to_le32(length);
3070                 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3071                 qp->prev_wqe_size = rwqe->wqe_size;
3072                 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3073                 break;
3074         case IB_WR_RDMA_READ_WITH_INV:
3075                 SET_FIELD2(wqe->flags, RDMA_SQ_RDMA_WQE_1ST_READ_INV_FLG, 1);
3076                 /* fallthrough -- same is identical to RDMA READ */
3077
3078         case IB_WR_RDMA_READ:
3079                 wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
3080                 rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
3081
3082                 rwqe->wqe_size = 2;
3083                 rwqe2 = qed_chain_produce(&qp->sq.pbl);
3084                 length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2,
3085                                                    wr, bad_wr);
3086                 rwqe->length = cpu_to_le32(length);
3087                 qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
3088                 qp->prev_wqe_size = rwqe->wqe_size;
3089                 qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
3090                 break;
3091
3092         case IB_WR_ATOMIC_CMP_AND_SWP:
3093         case IB_WR_ATOMIC_FETCH_AND_ADD:
3094                 awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
3095                 awqe1->wqe_size = 4;
3096
3097                 awqe2 = qed_chain_produce(&qp->sq.pbl);
3098                 DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr);
3099                 awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey);
3100
3101                 awqe3 = qed_chain_produce(&qp->sq.pbl);
3102
3103                 if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
3104                         wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
3105                         DMA_REGPAIR_LE(awqe3->swap_data,
3106                                        atomic_wr(wr)->compare_add);
3107                 } else {
3108                         wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
3109                         DMA_REGPAIR_LE(awqe3->swap_data,
3110                                        atomic_wr(wr)->swap);
3111                         DMA_REGPAIR_LE(awqe3->cmp_data,
3112                                        atomic_wr(wr)->compare_add);
3113                 }
3114
3115                 qedr_prepare_sq_sges(qp, NULL, wr);
3116
3117                 qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
3118                 qp->prev_wqe_size = awqe1->wqe_size;
3119                 break;
3120
3121         case IB_WR_LOCAL_INV:
3122                 iwqe = (struct rdma_sq_local_inv_wqe *)wqe;
3123                 iwqe->wqe_size = 1;
3124
3125                 iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE;
3126                 iwqe->inv_l_key = wr->ex.invalidate_rkey;
3127                 qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size;
3128                 qp->prev_wqe_size = iwqe->wqe_size;
3129                 break;
3130         case IB_WR_REG_MR:
3131                 DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n");
3132                 wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR;
3133                 fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe;
3134                 fwqe1->wqe_size = 2;
3135
3136                 rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr));
3137                 if (rc) {
3138                         DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc);
3139                         *bad_wr = wr;
3140                         break;
3141                 }
3142
3143                 qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size;
3144                 qp->prev_wqe_size = fwqe1->wqe_size;
3145                 break;
3146         default:
3147                 DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode);
3148                 rc = -EINVAL;
3149                 *bad_wr = wr;
3150                 break;
3151         }
3152
3153         if (*bad_wr) {
3154                 u16 value;
3155
3156                 /* Restore prod to its position before
3157                  * this WR was processed
3158                  */
3159                 value = le16_to_cpu(qp->sq.db_data.data.value);
3160                 qed_chain_set_prod(&qp->sq.pbl, value, wqe);
3161
3162                 /* Restore prev_wqe_size */
3163                 qp->prev_wqe_size = wqe->prev_wqe_size;
3164                 rc = -EINVAL;
3165                 DP_ERR(dev, "POST SEND FAILED\n");
3166         }
3167
3168         return rc;
3169 }
3170
3171 int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
3172                    struct ib_send_wr **bad_wr)
3173 {
3174         struct qedr_dev *dev = get_qedr_dev(ibqp->device);
3175         struct qedr_qp *qp = get_qedr_qp(ibqp);
3176         unsigned long flags;
3177         int rc = 0;
3178
3179         *bad_wr = NULL;
3180
3181         if (qp->qp_type == IB_QPT_GSI)
3182                 return qedr_gsi_post_send(ibqp, wr, bad_wr);
3183
3184         spin_lock_irqsave(&qp->q_lock, flags);
3185
3186         if (rdma_protocol_roce(&dev->ibdev, 1)) {
3187                 if ((qp->state != QED_ROCE_QP_STATE_RTS) &&
3188                     (qp->state != QED_ROCE_QP_STATE_ERR) &&
3189                     (qp->state != QED_ROCE_QP_STATE_SQD)) {
3190                         spin_unlock_irqrestore(&qp->q_lock, flags);
3191                         *bad_wr = wr;
3192                         DP_DEBUG(dev, QEDR_MSG_CQ,
3193                                  "QP in wrong state! QP icid=0x%x state %d\n",
3194                                  qp->icid, qp->state);
3195                         return -EINVAL;
3196                 }
3197         }
3198
3199         while (wr) {
3200                 rc = __qedr_post_send(ibqp, wr, bad_wr);
3201                 if (rc)
3202                         break;
3203
3204                 qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;