Merge tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 7 Apr 2018 00:35:43 +0000 (17:35 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 7 Apr 2018 00:35:43 +0000 (17:35 -0700)
Pull rdma updates from Jason Gunthorpe:
 "Doug and I are at a conference next week so if another PR is sent I
  expect it to only be bug fixes. Parav noted yesterday that there are
  some fringe case behavior changes in his work that he would like to
  fix, and I see that Intel has a number of rc looking patches for HFI1
  they posted yesterday.

  Parav is again the biggest contributor by patch count with his ongoing
  work to enable container support in the RDMA stack, followed by Leon
  doing syzkaller inspired cleanups, though most of the actual fixing
  went to RC.

  There is one uncomfortable series here fixing the user ABI to actually
  work as intended in 32 bit mode. There are lots of notes in the commit
  messages, but the basic summary is we don't think there is an actual
  32 bit kernel user of drivers/infiniband for several good reasons.

  However we are seeing people want to use a 32 bit user space with 64
  bit kernel, which didn't completely work today. So in fixing it we
  required a 32 bit rxe user to upgrade their userspace. rxe users are
  still already quite rare and we think a 32 bit one is non-existing.

   - Fix RDMA uapi headers to actually compile in userspace and be more
     complete

   - Three shared with netdev pull requests from Mellanox:

      * 7 patches, mostly to net with 1 IB related one at the back).
        This series addresses an IRQ performance issue (patch 1),
        cleanups related to the fix for the IRQ performance problem
        (patches 2-6), and then extends the fragmented completion queue
        support that already exists in the net side of the driver to the
        ib side of the driver (patch 7).

      * Mostly IB, with 5 patches to net that are needed to support the
        remaining 10 patches to the IB subsystem. This series extends
        the current 'representor' framework when the mlx5 driver is in
        switchdev mode from being a netdev only construct to being a
        netdev/IB dev construct. The IB dev is limited to raw Eth queue
        pairs only, but by having an IB dev of this type attached to the
        representor for a switchdev port, it enables DPDK to work on the
        switchdev device.

      * All net related, but needed as infrastructure for the rdma
        driver

   - Updates for the hns, i40iw, bnxt_re, cxgb3, cxgb4, hns drivers

   - SRP performance updates

   - IB uverbs write path cleanup patch series from Leon

   - Add RDMA_CM support to ib_srpt. This is disabled by default. Users
     need to set the port for ib_srpt to listen on in configfs in order
     for it to be enabled
     (/sys/kernel/config/target/srpt/discovery_auth/rdma_cm_port)

   - TSO and Scatter FCS support in mlx4

   - Refactor of modify_qp routine to resolve problems seen while
     working on new code that is forthcoming

   - More refactoring and updates of RDMA CM for containers support from
     Parav

   - mlx5 'fine grained packet pacing', 'ipsec offload' and 'device
     memory' user API features

   - Infrastructure updates for the new IOCTL interface, based on
     increased usage

   - ABI compatibility bug fixes to fully support 32 bit userspace on 64
     bit kernel as was originally intended. See the commit messages for
     extensive details

   - Syzkaller bugs and code cleanups motivated by them"

* tag 'for-linus-unmerged' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (199 commits)
  IB/rxe: Fix for oops in rxe_register_device on ppc64le arch
  IB/mlx5: Device memory mr registration support
  net/mlx5: Mkey creation command adjustments
  IB/mlx5: Device memory support in mlx5_ib
  net/mlx5: Query device memory capabilities
  IB/uverbs: Add device memory registration ioctl support
  IB/uverbs: Add alloc/free dm uverbs ioctl support
  IB/uverbs: Add device memory capabilities reporting
  IB/uverbs: Expose device memory capabilities to user
  RDMA/qedr: Fix wmb usage in qedr
  IB/rxe: Removed GID add/del dummy routines
  RDMA/qedr: Zero stack memory before copying to user space
  IB/mlx5: Add ability to hash by IPSEC_SPI when creating a TIR
  IB/mlx5: Add information for querying IPsec capabilities
  IB/mlx5: Add IPsec support for egress and ingress
  {net,IB}/mlx5: Add ipsec helper
  IB/mlx5: Add modify_flow_action_esp verb
  IB/mlx5: Add implementation for create and destroy action_xfrm
  IB/uverbs: Introduce ESP steering match filter
  IB/uverbs: Add modify ESP flow_action
  ...

176 files changed:
.mailmap
MAINTAINERS
drivers/infiniband/Kconfig
drivers/infiniband/core/Makefile
drivers/infiniband/core/addr.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/cma_priv.h [new file with mode: 0644]
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/iwpm_util.c
drivers/infiniband/core/multicast.c
drivers/infiniband/core/nldev.c
drivers/infiniband/core/rdma_core.c
drivers/infiniband/core/restrack.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucm.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_ioctl.c
drivers/infiniband/core/uverbs_ioctl_merge.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_std_types.c
drivers/infiniband/core/uverbs_std_types_cq.c [new file with mode: 0644]
drivers/infiniband/core/uverbs_std_types_dm.c [new file with mode: 0644]
drivers/infiniband/core/uverbs_std_types_flow_action.c [new file with mode: 0644]
drivers/infiniband/core/uverbs_std_types_mr.c [new file with mode: 0644]
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/bnxt_re/ib_verbs.h
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/bnxt_re/qplib_sp.c
drivers/infiniband/hw/cxgb3/Kconfig
drivers/infiniband/hw/cxgb3/Makefile
drivers/infiniband/hw/cxgb3/cxio_dbg.c [deleted file]
drivers/infiniband/hw/cxgb3/cxio_hal.h
drivers/infiniband/hw/cxgb3/iwch_cq.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/hfi1/driver.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/qp.c
drivers/infiniband/hw/hfi1/user_exp_rcv.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hns/Makefile
drivers/infiniband/hw/hns/hns_roce_ah.c
drivers/infiniband/hw/hns/hns_roce_cq.c
drivers/infiniband/hw/hns/hns_roce_db.c [new file with mode: 0644]
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_mr.c
drivers/infiniband/hw/hns/hns_roce_pd.c
drivers/infiniband/hw/hns/hns_roce_qp.c
drivers/infiniband/hw/i40iw/i40iw.h
drivers/infiniband/hw/i40iw/i40iw_cm.c
drivers/infiniband/hw/i40iw/i40iw_cm.h
drivers/infiniband/hw/i40iw/i40iw_ctrl.c
drivers/infiniband/hw/i40iw/i40iw_d.h
drivers/infiniband/hw/i40iw/i40iw_hw.c
drivers/infiniband/hw/i40iw/i40iw_main.c
drivers/infiniband/hw/i40iw/i40iw_puda.c
drivers/infiniband/hw/i40iw/i40iw_type.h
drivers/infiniband/hw/i40iw/i40iw_ucontext.h [deleted file]
drivers/infiniband/hw/i40iw/i40iw_utils.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/ah.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx5/ah.c
drivers/infiniband/hw/mlx5/cmd.c
drivers/infiniband/hw/mlx5/cmd.h
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
drivers/infiniband/hw/qedr/main.c
drivers/infiniband/hw/qedr/qedr_roce_cm.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/hw/qedr/verbs.h
drivers/infiniband/hw/qib/qib.h
drivers/infiniband/hw/qib/qib_diag.c
drivers/infiniband/hw/qib/qib_file_ops.c
drivers/infiniband/hw/qib/qib_iba7322.c
drivers/infiniband/hw/qib/qib_init.c
drivers/infiniband/hw/qib/qib_sdma.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/usnic/usnic_transport.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
drivers/infiniband/sw/rdmavt/vt.c
drivers/infiniband/sw/rdmavt/vt.h
drivers/infiniband/sw/rxe/rxe.c
drivers/infiniband/sw/rxe/rxe.h
drivers/infiniband/sw/rxe/rxe_av.c
drivers/infiniband/sw/rxe/rxe_cq.c
drivers/infiniband/sw/rxe/rxe_loc.h
drivers/infiniband/sw/rxe/rxe_net.c
drivers/infiniband/sw/rxe/rxe_qp.c
drivers/infiniband/sw/rxe/rxe_queue.c
drivers/infiniband/sw/rxe/rxe_queue.h
drivers/infiniband/sw/rxe/rxe_recv.c
drivers/infiniband/sw/rxe/rxe_resp.c
drivers/infiniband/sw/rxe/rxe_srq.c
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/sw/rxe/rxe_verbs.h
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/infiniband/ulp/srpt/ib_srpt.h
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx5/core/en_common.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
drivers/net/ethernet/mellanox/mlx5/core/fw.c
drivers/net/ethernet/mellanox/mlx5/core/rl.c
include/linux/mlx4/device.h
include/linux/mlx5/device.h
include/linux/mlx5/driver.h
include/linux/mlx5/fs_helpers.h
include/linux/mlx5/mlx5_ifc.h
include/rdma/ib_addr.h
include/rdma/ib_cache.h
include/rdma/ib_sa.h
include/rdma/ib_verbs.h
include/rdma/rdma_cm.h
include/rdma/rdma_vt.h
include/rdma/restrack.h
include/rdma/uverbs_ioctl.h
include/rdma/uverbs_named_ioctl.h [new file with mode: 0644]
include/rdma/uverbs_std_types.h
include/uapi/rdma/bnxt_re-abi.h
include/uapi/rdma/cxgb3-abi.h
include/uapi/rdma/cxgb4-abi.h
include/uapi/rdma/hfi/hfi1_ioctl.h
include/uapi/rdma/hfi/hfi1_user.h
include/uapi/rdma/hns-abi.h
include/uapi/rdma/i40iw-abi.h [new file with mode: 0644]
include/uapi/rdma/ib_user_cm.h
include/uapi/rdma/ib_user_ioctl_cmds.h [new file with mode: 0644]
include/uapi/rdma/ib_user_ioctl_verbs.h
include/uapi/rdma/ib_user_mad.h
include/uapi/rdma/ib_user_verbs.h
include/uapi/rdma/mlx4-abi.h
include/uapi/rdma/mlx5-abi.h
include/uapi/rdma/mlx5_user_ioctl_cmds.h [new file with mode: 0644]
include/uapi/rdma/mlx5_user_ioctl_verbs.h [new file with mode: 0644]
include/uapi/rdma/mthca-abi.h
include/uapi/rdma/nes-abi.h
include/uapi/rdma/ocrdma-abi.h
include/uapi/rdma/qedr-abi.h
include/uapi/rdma/rdma_netlink.h
include/uapi/rdma/rdma_user_cm.h
include/uapi/rdma/rdma_user_ioctl.h
include/uapi/rdma/rdma_user_ioctl_cmds.h [new file with mode: 0644]
include/uapi/rdma/rdma_user_rxe.h
include/uapi/rdma/vmw_pvrdma-abi.h

index 9795e6b..7fa9d41 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -102,6 +102,8 @@ Koushik <raghavendra.koushik@neterion.com>
 Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
 Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
+Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
+Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
 Leonid I Ananiev <leonid.i.ananiev@intel.com>
 Linas Vepstas <linas@austin.ibm.com>
 Linus L├╝ssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
index f1be26e..0407846 100644 (file)
@@ -7214,6 +7214,7 @@ M:        Shiraz Saleem <shiraz.saleem@intel.com>
 L:     linux-rdma@vger.kernel.org
 S:     Supported
 F:     drivers/infiniband/hw/i40iw/
+F:     include/uapi/rdma/i40iw-abi.h
 
 INTEL SHA MULTIBUFFER DRIVER
 M:     Megha Dey <megha.dey@linux.intel.com>
index 8517d6e..ee270e0 100644 (file)
@@ -35,14 +35,13 @@ config INFINIBAND_USER_ACCESS
          libibverbs, libibcm and a hardware driver library from
          rdma-core <https://github.com/linux-rdma/rdma-core>.
 
-config INFINIBAND_EXP_USER_ACCESS
-       bool "Enable the full uverbs ioctl interface (EXPERIMENTAL)"
+config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
+       bool "Allow experimental legacy verbs in new ioctl uAPI  (EXPERIMENTAL)"
        depends on INFINIBAND_USER_ACCESS
        ---help---
-         IOCTL based ABI support for Infiniband. This allows userspace
-         to invoke the experimental IOCTL based ABI.
-         These commands are parsed via per-device parsing tree and
-         enables per-device features.
+         IOCTL based uAPI support for Infiniband is enabled by default for
+         new verbs only. This allows userspace to invoke the IOCTL based uAPI
+         for current legacy verbs too.
 
 config INFINIBAND_USER_MEM
        bool
index f69833d..dda9e85 100644 (file)
@@ -34,4 +34,6 @@ ib_ucm-y :=                   ucm.o
 
 ib_uverbs-y :=                 uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
                                rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
-                               uverbs_ioctl_merge.o
+                               uverbs_ioctl_merge.o uverbs_std_types_cq.o \
+                               uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
+                               uverbs_std_types_mr.o
index cb1d2ab..88a7542 100644 (file)
@@ -329,7 +329,8 @@ static void queue_req(struct addr_req *req)
        mutex_unlock(&lock);
 }
 
-static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int ib_nl_fetch_ha(const struct dst_entry *dst,
+                         struct rdma_dev_addr *dev_addr,
                          const void *daddr, u32 seq, u16 family)
 {
        if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS))
@@ -340,7 +341,8 @@ static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
        return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
 }
 
-static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int dst_fetch_ha(const struct dst_entry *dst,
+                       struct rdma_dev_addr *dev_addr,
                        const void *daddr)
 {
        struct neighbour *n;
@@ -364,7 +366,7 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
        return ret;
 }
 
-static bool has_gateway(struct dst_entry *dst, sa_family_t family)
+static bool has_gateway(const struct dst_entry *dst, sa_family_t family)
 {
        struct rtable *rt;
        struct rt6_info *rt6;
@@ -378,7 +380,7 @@ static bool has_gateway(struct dst_entry *dst, sa_family_t family)
        return rt6->rt6i_flags & RTF_GATEWAY;
 }
 
-static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
                    const struct sockaddr *dst_in, u32 seq)
 {
        const struct sockaddr_in *dst_in4 =
@@ -482,7 +484,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 }
 #endif
 
-static int addr_resolve_neigh(struct dst_entry *dst,
+static int addr_resolve_neigh(const struct dst_entry *dst,
                              const struct sockaddr *dst_in,
                              struct rdma_dev_addr *addr,
                              u32 seq)
@@ -736,7 +738,6 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
 
        return addr_resolve(src_in, dst_addr, addr, false, 0);
 }
-EXPORT_SYMBOL(rdma_resolve_ip_route);
 
 void rdma_addr_cancel(struct rdma_dev_addr *addr)
 {
index e9a409d..e337b08 100644 (file)
@@ -59,8 +59,6 @@ struct ib_update_work {
 union ib_gid zgid;
 EXPORT_SYMBOL(zgid);
 
-static const struct ib_gid_attr zattr;
-
 enum gid_attr_find_mask {
        GID_ATTR_FIND_MASK_GID          = 1UL << 0,
        GID_ATTR_FIND_MASK_NETDEV       = 1UL << 1,
@@ -73,15 +71,6 @@ enum gid_table_entry_props {
        GID_TABLE_ENTRY_DEFAULT         = 1UL << 1,
 };
 
-enum gid_table_write_action {
-       GID_TABLE_WRITE_ACTION_ADD,
-       GID_TABLE_WRITE_ACTION_DEL,
-       /* MODIFY only updates the GID table. Currently only used by
-        * ib_cache_update.
-        */
-       GID_TABLE_WRITE_ACTION_MODIFY
-};
-
 struct ib_gid_table_entry {
        unsigned long       props;
        union ib_gid        gid;
@@ -100,31 +89,26 @@ struct ib_gid_table {
         * (a) Find the GID
         * (b) Delete it.
         *
-        * Add/delete should be carried out atomically.
-        * This is done by locking this mutex from multiple
-        * writers. We don't need this lock for IB, as the MAD
-        * layer replaces all entries. All data_vec entries
-        * are locked by this lock.
         **/
-       struct mutex         lock;
-       /* This lock protects the table entries from being
-        * read and written simultaneously.
+       /* Any writer to data_vec must hold this lock and the write side of
+        * rwlock. readers must hold only rwlock. All writers must be in a
+        * sleepable context.
         */
+       struct mutex         lock;
+       /* rwlock protects data_vec[ix]->props. */
        rwlock_t             rwlock;
        struct ib_gid_table_entry *data_vec;
 };
 
 static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
 {
-       if (rdma_cap_roce_gid_table(ib_dev, port)) {
-               struct ib_event event;
+       struct ib_event event;
 
-               event.device            = ib_dev;
-               event.element.port_num  = port;
-               event.event             = IB_EVENT_GID_CHANGE;
+       event.device            = ib_dev;
+       event.element.port_num  = port;
+       event.event             = IB_EVENT_GID_CHANGE;
 
-               ib_dispatch_event(&event);
-       }
+       ib_dispatch_event(&event);
 }
 
 static const char * const gid_type_str[] = {
@@ -165,94 +149,127 @@ int ib_cache_gid_parse_type_str(const char *buf)
 }
 EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
 
-/* This function expects that rwlock will be write locked in all
- * scenarios and that lock will be locked in sleep-able (RoCE)
- * scenarios.
- */
-static int write_gid(struct ib_device *ib_dev, u8 port,
-                    struct ib_gid_table *table, int ix,
-                    const union ib_gid *gid,
-                    const struct ib_gid_attr *attr,
-                    enum gid_table_write_action action,
-                    bool  default_gid)
-       __releases(&table->rwlock) __acquires(&table->rwlock)
+static void del_roce_gid(struct ib_device *device, u8 port_num,
+                        struct ib_gid_table *table, int ix)
 {
-       int ret = 0;
-       struct net_device *old_net_dev;
-       enum ib_gid_type old_gid_type;
+       pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+                device->name, port_num, ix,
+                table->data_vec[ix].gid.raw);
+
+       if (rdma_cap_roce_gid_table(device, port_num))
+               device->del_gid(&table->data_vec[ix].attr,
+                               &table->data_vec[ix].context);
+       dev_put(table->data_vec[ix].attr.ndev);
+}
 
-       /* in rdma_cap_roce_gid_table, this funciton should be protected by a
-        * sleep-able lock.
-        */
+static int add_roce_gid(struct ib_gid_table *table,
+                       const union ib_gid *gid,
+                       const struct ib_gid_attr *attr)
+{
+       struct ib_gid_table_entry *entry;
+       int ix = attr->index;
+       int ret = 0;
 
-       if (rdma_cap_roce_gid_table(ib_dev, port)) {
-               table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
-               write_unlock_irq(&table->rwlock);
-               /* GID_TABLE_WRITE_ACTION_MODIFY currently isn't supported by
-                * RoCE providers and thus only updates the cache.
-                */
-               if (action == GID_TABLE_WRITE_ACTION_ADD)
-                       ret = ib_dev->add_gid(ib_dev, port, ix, gid, attr,
-                                             &table->data_vec[ix].context);
-               else if (action == GID_TABLE_WRITE_ACTION_DEL)
-                       ret = ib_dev->del_gid(ib_dev, port, ix,
-                                             &table->data_vec[ix].context);
-               write_lock_irq(&table->rwlock);
+       if (!attr->ndev) {
+               pr_err("%s NULL netdev device=%s port=%d index=%d\n",
+                      __func__, attr->device->name, attr->port_num,
+                      attr->index);
+               return -EINVAL;
        }
 
-       old_net_dev = table->data_vec[ix].attr.ndev;
-       old_gid_type = table->data_vec[ix].attr.gid_type;
-       if (old_net_dev && old_net_dev != attr->ndev)
-               dev_put(old_net_dev);
-       /* if modify_gid failed, just delete the old gid */
-       if (ret || action == GID_TABLE_WRITE_ACTION_DEL) {
-               gid = &zgid;
-               attr = &zattr;
-               table->data_vec[ix].context = NULL;
+       entry = &table->data_vec[ix];
+       if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
+               WARN(1, "GID table corruption device=%s port=%d index=%d\n",
+                    attr->device->name, attr->port_num,
+                    attr->index);
+               return -EINVAL;
        }
 
-       memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
-       memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
-       if (default_gid) {
-               table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
-               if (action == GID_TABLE_WRITE_ACTION_DEL)
-                       table->data_vec[ix].attr.gid_type = old_gid_type;
+       if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
+               ret = attr->device->add_gid(gid, attr, &entry->context);
+               if (ret) {
+                       pr_err("%s GID add failed device=%s port=%d index=%d\n",
+                              __func__, attr->device->name, attr->port_num,
+                              attr->index);
+                       goto add_err;
+               }
        }
-       if (table->data_vec[ix].attr.ndev &&
-           table->data_vec[ix].attr.ndev != old_net_dev)
-               dev_hold(table->data_vec[ix].attr.ndev);
-
-       table->data_vec[ix].props &= ~GID_TABLE_ENTRY_INVALID;
+       dev_hold(attr->ndev);
 
+add_err:
+       if (!ret)
+               pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+                        attr->device->name, attr->port_num, ix, gid->raw);
        return ret;
 }
 
-static int add_gid(struct ib_device *ib_dev, u8 port,
-                  struct ib_gid_table *table, int ix,
-                  const union ib_gid *gid,
-                  const struct ib_gid_attr *attr,
-                  bool  default_gid) {
-       return write_gid(ib_dev, port, table, ix, gid, attr,
-                        GID_TABLE_WRITE_ACTION_ADD, default_gid);
-}
+/**
+ * add_modify_gid - Add or modify GID table entry
+ *
+ * @table:     GID table in which GID to be added or modified
+ * @gid:       GID content
+ * @attr:      Attributes of the GID
+ *
+ * Returns 0 on success or appropriate error code. It accepts zero
+ * GID addition for non RoCE ports for HCA's who report them as valid
+ * GID. However such zero GIDs are not added to the cache.
+ */
+static int add_modify_gid(struct ib_gid_table *table,
+                         const union ib_gid *gid,
+                         const struct ib_gid_attr *attr)
+{
+       int ret;
+
+       if (rdma_protocol_roce(attr->device, attr->port_num)) {
+               ret = add_roce_gid(table, gid, attr);
+               if (ret)
+                       return ret;
+       } else {
+               /*
+                * Some HCA's report multiple GID entries with only one
+                * valid GID, but remaining as zero GID.
+                * So ignore such behavior for IB link layer and don't
+                * fail the call, but don't add such entry to GID cache.
+                */
+               if (!memcmp(gid, &zgid, sizeof(*gid)))
+                       return 0;
+       }
+
+       lockdep_assert_held(&table->lock);
+       memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
+       memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
 
-static int modify_gid(struct ib_device *ib_dev, u8 port,
-                     struct ib_gid_table *table, int ix,
-                     const union ib_gid *gid,
-                     const struct ib_gid_attr *attr,
-                     bool  default_gid) {
-       return write_gid(ib_dev, port, table, ix, gid, attr,
-                        GID_TABLE_WRITE_ACTION_MODIFY, default_gid);
+       write_lock_irq(&table->rwlock);
+       table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
+       write_unlock_irq(&table->rwlock);
+       return 0;
 }
 
-static int del_gid(struct ib_device *ib_dev, u8 port,
-                  struct ib_gid_table *table, int ix,
-                  bool  default_gid) {
-       return write_gid(ib_dev, port, table, ix, &zgid, &zattr,
-                        GID_TABLE_WRITE_ACTION_DEL, default_gid);
+/**
+ * del_gid - Delete GID table entry
+ *
+ * @ib_dev:    IB device whose GID entry to be deleted
+ * @port:      Port number of the IB device
+ * @table:     GID table of the IB device for a port
+ * @ix:                GID entry index to delete
+ *
+ */
+static void del_gid(struct ib_device *ib_dev, u8 port,
+                   struct ib_gid_table *table, int ix)
+{
+       lockdep_assert_held(&table->lock);
+       write_lock_irq(&table->rwlock);
+       table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+       write_unlock_irq(&table->rwlock);
+
+       if (rdma_protocol_roce(ib_dev, port))
+               del_roce_gid(ib_dev, port, table, ix);
+       memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
+       memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
+       table->data_vec[ix].context = NULL;
 }
 
-/* rwlock should be read locked */
+/* rwlock should be read locked, or lock should be held */
 static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
                    const struct ib_gid_attr *val, bool default_gid,
                    unsigned long mask, int *pempty)
@@ -268,15 +285,32 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
 
                i++;
 
+               /* find_gid() is used during GID addition where it is expected
+                * to return a free entry slot which is not duplicate.
+                * Free entry slot is requested and returned if pempty is set,
+                * so lookup free slot only if requested.
+                */
+               if (pempty && empty < 0) {
+                       if (data->props & GID_TABLE_ENTRY_INVALID) {
+                               /* Found an invalid (free) entry; allocate it */
+                               if (data->props & GID_TABLE_ENTRY_DEFAULT) {
+                                       if (default_gid)
+                                               empty = curr_index;
+                               } else {
+                                       empty = curr_index;
+                               }
+                       }
+               }
+
+               /*
+                * Additionally find_gid() is used to find valid entry during
+                * lookup operation, where validity needs to be checked. So
+                * find the empty entry first to continue to search for a free
+                * slot and ignore its INVALID flag.
+                */
                if (data->props & GID_TABLE_ENTRY_INVALID)
                        continue;
 
-               if (empty < 0)
-                       if (!memcmp(&data->gid, &zgid, sizeof(*gid)) &&
-                           !memcmp(attr, &zattr, sizeof(*attr)) &&
-                           !data->props)
-                               empty = curr_index;
-
                if (found >= 0)
                        continue;
 
@@ -312,20 +346,56 @@ static void make_default_gid(struct  net_device *dev, union ib_gid *gid)
        addrconf_ifid_eui48(&gid->raw[8], dev);
 }
 
-int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
-                    union ib_gid *gid, struct ib_gid_attr *attr)
+static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+                             union ib_gid *gid, struct ib_gid_attr *attr,
+                             unsigned long mask, bool default_gid)
 {
        struct ib_gid_table *table;
-       int ix;
        int ret = 0;
-       struct net_device *idev;
        int empty;
+       int ix;
 
-       table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
-
+       /* Do not allow adding zero GID in support of
+        * IB spec version 1.3 section 4.1.1 point (6) and
+        * section 12.7.10 and section 12.7.20
+        */
        if (!memcmp(gid, &zgid, sizeof(*gid)))
                return -EINVAL;
 
+       table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
+
+       mutex_lock(&table->lock);
+
+       ix = find_gid(table, gid, attr, default_gid, mask, &empty);
+       if (ix >= 0)
+               goto out_unlock;
+
+       if (empty < 0) {
+               ret = -ENOSPC;
+               goto out_unlock;
+       }
+       attr->device = ib_dev;
+       attr->index = empty;
+       attr->port_num = port;
+       ret = add_modify_gid(table, gid, attr);
+       if (!ret)
+               dispatch_gid_change_event(ib_dev, port);
+
+out_unlock:
+       mutex_unlock(&table->lock);
+       if (ret)
+               pr_warn("%s: unable to add gid %pI6 error=%d\n",
+                       __func__, gid->raw, ret);
+       return ret;
+}
+
+int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
+                    union ib_gid *gid, struct ib_gid_attr *attr)
+{
+       struct net_device *idev;
+       unsigned long mask;
+       int ret;
+
        if (ib_dev->get_netdev) {
                idev = ib_dev->get_netdev(ib_dev, port);
                if (idev && attr->ndev != idev) {
@@ -342,27 +412,11 @@ int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
                        dev_put(idev);
        }
 
-       mutex_lock(&table->lock);
-       write_lock_irq(&table->rwlock);
-
-       ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID |
-                     GID_ATTR_FIND_MASK_GID_TYPE |
-                     GID_ATTR_FIND_MASK_NETDEV, &empty);
-       if (ix >= 0)
-               goto out_unlock;
-
-       if (empty < 0) {
-               ret = -ENOSPC;
-               goto out_unlock;
-       }
-
-       ret = add_gid(ib_dev, port, table, empty, gid, attr, false);
-       if (!ret)
-               dispatch_gid_change_event(ib_dev, port);
+       mask = GID_ATTR_FIND_MASK_GID |
+              GID_ATTR_FIND_MASK_GID_TYPE |
+              GID_ATTR_FIND_MASK_NETDEV;
 
-out_unlock:
-       write_unlock_irq(&table->rwlock);
-       mutex_unlock(&table->lock);
+       ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
        return ret;
 }
 
@@ -370,29 +424,32 @@ int ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
                     union ib_gid *gid, struct ib_gid_attr *attr)
 {
        struct ib_gid_table *table;
+       int ret = 0;
        int ix;
 
        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 
        mutex_lock(&table->lock);
-       write_lock_irq(&table->rwlock);
 
        ix = find_gid(table, gid, attr, false,
                      GID_ATTR_FIND_MASK_GID      |
                      GID_ATTR_FIND_MASK_GID_TYPE |
-                     GID_ATTR_FIND_MASK_NETDEV   |
-                     GID_ATTR_FIND_MASK_DEFAULT,
+                     GID_ATTR_FIND_MASK_NETDEV,
                      NULL);
-       if (ix < 0)
+       if (ix < 0) {
+               ret = -EINVAL;
                goto out_unlock;
+       }
 
-       if (!del_gid(ib_dev, port, table, ix, false))
-               dispatch_gid_change_event(ib_dev, port);
+       del_gid(ib_dev, port, table, ix);
+       dispatch_gid_change_event(ib_dev, port);
 
 out_unlock:
-       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
-       return 0;
+       if (ret)
+               pr_debug("%s: can't delete gid %pI6 error=%d\n",
+                        __func__, gid->raw, ret);
+       return ret;
 }
 
 int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
@@ -405,16 +462,14 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 
        mutex_lock(&table->lock);
-       write_lock_irq(&table->rwlock);
 
-       for (ix = 0; ix < table->sz; ix++)
-               if (table->data_vec[ix].attr.ndev == ndev)
-                       if (!del_gid(ib_dev, port, table, ix,
-                                    !!(table->data_vec[ix].props &
-                                       GID_TABLE_ENTRY_DEFAULT)))
-                               deleted = true;
+       for (ix = 0; ix < table->sz; ix++) {
+               if (table->data_vec[ix].attr.ndev == ndev) {
+                       del_gid(ib_dev, port, table, ix);
+                       deleted = true;
+               }
+       }
 
-       write_unlock_irq(&table->rwlock);
        mutex_unlock(&table->lock);
 
        if (deleted)
@@ -492,6 +547,19 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
                                        mask, port, index);
 }
 
+/**
+ * ib_find_cached_gid_by_port - Returns the GID table index where a specified
+ * GID value occurs. It searches for the specified GID value in the local
+ * software cache.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @port_num: The port number of the device where the GID value should be
+ *   searched.
+ * @ndev: In RoCE, the net device of the device. Null means ignore.
+ * @index: The index into the cached GID table where the GID was found. This
+ *   parameter may be NULL.
+ */
 int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
                               const union ib_gid *gid,
                               enum ib_gid_type gid_type,
@@ -528,7 +596,7 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
 EXPORT_SYMBOL(ib_find_cached_gid_by_port);
 
 /**
- * ib_find_gid_by_filter - Returns the GID table index where a specified
+ * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
  * GID value occurs
  * @device: The device to query.
  * @gid: The GID value to search for.
@@ -539,7 +607,7 @@ EXPORT_SYMBOL(ib_find_cached_gid_by_port);
  *   otherwise, we continue searching the GID table. It's guaranteed that
  *   while filter is executed, ndev field is valid and the structure won't
  *   change. filter is executed in an atomic context. filter must not be NULL.
- * @index: The index into the cached GID table where the GID was found.  This
+ * @index: The index into the cached GID table where the GID was found. This
  *   parameter may be NULL.
  *
  * ib_cache_gid_find_by_filter() searches for the specified GID value
@@ -598,6 +666,7 @@ static struct ib_gid_table *alloc_gid_table(int sz)
 {
        struct ib_gid_table *table =
                kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
+       int i;
 
        if (!table)
                return NULL;
@@ -611,6 +680,11 @@ static struct ib_gid_table *alloc_gid_table(int sz)
        table->sz = sz;
        rwlock_init(&table->rwlock);
 
+       /* Mark all entries as invalid so that allocator can allocate
+        * one of the invalid (free) entry.
+        */
+       for (i = 0; i < sz; i++)
+               table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
        return table;
 
 err_free_table:
@@ -635,16 +709,15 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
        if (!table)
                return;
 
-       write_lock_irq(&table->rwlock);
+       mutex_lock(&table->lock);
        for (i = 0; i < table->sz; ++i) {
                if (memcmp(&table->data_vec[i].gid, &zgid,
-                          sizeof(table->data_vec[i].gid)))
-                       if (!del_gid(ib_dev, port, table, i,
-                                    table->data_vec[i].props &
-                                    GID_ATTR_FIND_MASK_DEFAULT))
-                               deleted = true;
+                          sizeof(table->data_vec[i].gid))) {
+                       del_gid(ib_dev, port, table, i);
+                       deleted = true;
+               }
        }
-       write_unlock_irq(&table->rwlock);
+       mutex_unlock(&table->lock);
 
        if (deleted)
                dispatch_gid_change_event(ib_dev, port);
@@ -657,9 +730,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
 {
        union ib_gid gid;
        struct ib_gid_attr gid_attr;
-       struct ib_gid_attr zattr_type = zattr;
        struct ib_gid_table *table;
        unsigned int gid_type;
+       unsigned long mask;
 
        table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
 
@@ -668,60 +741,19 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
        gid_attr.ndev = ndev;
 
        for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) {
-               int ix;
-               union ib_gid current_gid;
-               struct ib_gid_attr current_gid_attr = {};
-
                if (1UL << gid_type & ~gid_type_mask)
                        continue;
 
                gid_attr.gid_type = gid_type;
 
-               mutex_lock(&table->lock);
-               write_lock_irq(&table->rwlock);
-               ix = find_gid(table, NULL, &gid_attr, true,
-                             GID_ATTR_FIND_MASK_GID_TYPE |
-                             GID_ATTR_FIND_MASK_DEFAULT,
-                             NULL);
-
-               /* Coudn't find default GID location */
-               if (WARN_ON(ix < 0))
-                       goto release;
-
-               zattr_type.gid_type = gid_type;
-
-               if (!__ib_cache_gid_get(ib_dev, port, ix,
-                                       &current_gid, &current_gid_attr) &&
-                   mode == IB_CACHE_GID_DEFAULT_MODE_SET &&
-                   !memcmp(&gid, &current_gid, sizeof(gid)) &&
-                   !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr)))
-                       goto release;
-
-               if (memcmp(&current_gid, &zgid, sizeof(current_gid)) ||
-                   memcmp(&current_gid_attr, &zattr_type,
-                          sizeof(current_gid_attr))) {
-                       if (del_gid(ib_dev, port, table, ix, true)) {
-                               pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n",
-                                       ix, gid.raw);
-                               goto release;
-                       } else {
-                               dispatch_gid_change_event(ib_dev, port);
-                       }
-               }
-
                if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) {
-                       if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true))
-                               pr_warn("ib_cache_gid: unable to add default gid %pI6\n",
-                                       gid.raw);
-                       else
-                               dispatch_gid_change_event(ib_dev, port);
+                       mask = GID_ATTR_FIND_MASK_GID_TYPE |
+                              GID_ATTR_FIND_MASK_DEFAULT;
+                       __ib_cache_gid_add(ib_dev, port, &gid,
+                                          &gid_attr, mask, true);
+               } else if (mode == IB_CACHE_GID_DEFAULT_MODE_DELETE) {
+                       ib_cache_gid_del(ib_dev, port, &gid, &gid_attr);
                }
-
-release:
-               if (current_gid_attr.ndev)
-                       dev_put(current_gid_attr.ndev);
-               write_unlock_irq(&table->rwlock);
-               mutex_unlock(&table->lock);
        }
 }
 
@@ -848,6 +880,20 @@ int ib_get_cached_gid(struct ib_device *device,
 }
 EXPORT_SYMBOL(ib_get_cached_gid);
 
+/**
+ * ib_find_cached_gid - Returns the port number and GID table index where
+ *   a specified GID value occurs.
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @gid_type: The GID type to search for.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ * @port_num: The port number of the device where the GID value was found.
+ * @index: The index into the cached GID table where the GID was found.  This
+ *   parameter may be NULL.
+ *
+ * ib_find_cached_gid() searches for the specified GID value in
+ * the local software cache.
+ */
 int ib_find_cached_gid(struct ib_device *device,
                       const union ib_gid *gid,
                       enum ib_gid_type gid_type,
@@ -868,7 +914,7 @@ int ib_find_gid_by_filter(struct ib_device *device,
                          void *context, u16 *index)
 {
        /* Only RoCE GID table supports filter function */
-       if (!rdma_cap_roce_gid_table(device, port_num) && filter)
+       if (!rdma_protocol_roce(device, port_num) && filter)
                return -EPROTONOSUPPORT;
 
        return ib_cache_gid_find_by_filter(device, gid,
@@ -910,8 +956,7 @@ int ib_get_cached_subnet_prefix(struct ib_device *device,
        unsigned long flags;
        int p;
 
-       if (port_num < rdma_start_port(device) ||
-           port_num > rdma_end_port(device))
+       if (!rdma_is_port_valid(device, port_num))
                return -EINVAL;
 
        p = port_num - rdma_start_port(device);
@@ -1021,7 +1066,7 @@ int ib_get_cached_port_state(struct ib_device   *device,
        unsigned long flags;
        int ret = 0;
 
-       if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
+       if (!rdma_is_port_valid(device, port_num))
                return -EINVAL;
 
        read_lock_irqsave(&device->cache.lock, flags);
@@ -1033,21 +1078,46 @@ int ib_get_cached_port_state(struct ib_device   *device,
 }
 EXPORT_SYMBOL(ib_get_cached_port_state);
 
+static int config_non_roce_gid_cache(struct ib_device *device,
+                                    u8 port, int gid_tbl_len)
+{
+       struct ib_gid_attr gid_attr = {};
+       struct ib_gid_table *table;
+       union ib_gid gid;
+       int ret = 0;
+       int i;
+
+       gid_attr.device = device;
+       gid_attr.port_num = port;
+       table = device->cache.ports[port - rdma_start_port(device)].gid;
+
+       mutex_lock(&table->lock);
+       for (i = 0; i < gid_tbl_len; ++i) {
+               if (!device->query_gid)
+                       continue;
+               ret = device->query_gid(device, port, i, &gid);
+               if (ret) {
+                       pr_warn("query_gid failed (%d) for %s (index %d)\n",
+                               ret, device->name, i);
+                       goto err;
+               }
+               gid_attr.index = i;
+               add_modify_gid(table, &gid, &gid_attr);
+       }
+err:
+       mutex_unlock(&table->lock);
+       return ret;
+}
+
 static void ib_cache_update(struct ib_device *device,
                            u8                port,
                            bool              enforce_security)
 {
        struct ib_port_attr       *tprops = NULL;
        struct ib_pkey_cache      *pkey_cache = NULL, *old_pkey_cache;
-       struct ib_gid_cache {
-               int             table_len;
-               union ib_gid    table[0];
-       }                         *gid_cache = NULL;
        int                        i;
        int                        ret;
        struct ib_gid_table       *table;
-       bool                       use_roce_gid_table =
-                                       rdma_cap_roce_gid_table(device, port);
 
        if (!rdma_is_port_valid(device, port))
                return;
@@ -1065,6 +1135,13 @@ static void ib_cache_update(struct ib_device *device,
                goto err;
        }
 
+       if (!rdma_protocol_roce(device, port)) {
+               ret = config_non_roce_gid_cache(device, port,
+                                               tprops->gid_tbl_len);
+               if (ret)
+                       goto err;
+       }
+
        pkey_cache = kmalloc(sizeof *pkey_cache + tprops->pkey_tbl_len *
                             sizeof *pkey_cache->table, GFP_KERNEL);
        if (!pkey_cache)
@@ -1072,15 +1149,6 @@ static void ib_cache_update(struct ib_device *device,
 
        pkey_cache->table_len = tprops->pkey_tbl_len;
 
-       if (!use_roce_gid_table) {
-               gid_cache = kmalloc(sizeof(*gid_cache) + tprops->gid_tbl_len *
-                           sizeof(*gid_cache->table), GFP_KERNEL);
-               if (!gid_cache)
-                       goto err;
-
-               gid_cache->table_len = tprops->gid_tbl_len;
-       }
-
        for (i = 0; i < pkey_cache->table_len; ++i) {
                ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
                if (ret) {
@@ -1090,33 +1158,12 @@ static void ib_cache_update(struct ib_device *device,
                }
        }
 
-       if (!use_roce_gid_table) {
-               for (i = 0;  i < gid_cache->table_len; ++i) {
-                       ret = ib_query_gid(device, port, i,
-                                          gid_cache->table + i, NULL);
-                       if (ret) {
-                               pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
-                                       ret, device->name, i);
-                               goto err;
-                       }
-               }
-       }
-
        write_lock_irq(&device->cache.lock);
 
        old_pkey_cache = device->cache.ports[port -
                rdma_start_port(device)].pkey;
 
        device->cache.ports[port - rdma_start_port(device)].pkey = pkey_cache;
-       if (!use_roce_gid_table) {
-               write_lock(&table->rwlock);
-               for (i = 0; i < gid_cache->table_len; i++) {
-                       modify_gid(device, port, table, i, gid_cache->table + i,
-                                  &zattr, false);
-               }
-               write_unlock(&table->rwlock);
-       }
-
        device->cache.ports[port - rdma_start_port(device)].lmc = tprops->lmc;
        device->cache.ports[port - rdma_start_port(device)].port_state =
                tprops->state;
@@ -1130,14 +1177,12 @@ static void ib_cache_update(struct ib_device *device,
                                         port,
                                         tprops->subnet_prefix);
 
-       kfree(gid_cache);
        kfree(old_pkey_cache);
        kfree(tprops);
        return;
 
 err:
        kfree(pkey_cache);
-       kfree(gid_cache);
        kfree(tprops);
 }
 
index e674915..a92e1a5 100644 (file)
@@ -462,13 +462,31 @@ static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
                                       grh, &av->ah_attr);
 }
 
-static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
-                             struct cm_id_private *cm_id_priv)
+static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
+                                 struct cm_av *av,
+                                 struct cm_port *port)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&cm.lock, flags);
+
+       if (&cm_id_priv->av == av)
+               list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
+       else if (&cm_id_priv->alt_av == av)
+               list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
+       else
+               ret = -EINVAL;
+
+       spin_unlock_irqrestore(&cm.lock, flags);
+       return ret;
+}
+
+static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
 {
        struct cm_device *cm_dev;
        struct cm_port *port = NULL;
        unsigned long flags;
-       int ret;
        u8 p;
        struct net_device *ndev = ib_get_ndev_from_path(path);
 
@@ -477,7 +495,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
                if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
                                        sa_conv_pathrec_to_gid_type(path),
                                        ndev, &p, NULL)) {
-                       port = cm_dev->port[p-1];
+                       port = cm_dev->port[p - 1];
                        break;
                }
        }
@@ -485,9 +503,20 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
 
        if (ndev)
                dev_put(ndev);
+       return port;
+}
 
+static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
+                             struct cm_id_private *cm_id_priv)
+{
+       struct cm_device *cm_dev;
+       struct cm_port *port;
+       int ret;
+
+       port = get_cm_port_from_path(path);
        if (!port)
                return -EINVAL;
+       cm_dev = port->cm_dev;
 
        ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
                                  be16_to_cpu(path->pkey), &av->pkey_index);
@@ -502,16 +531,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
 
        av->timeout = path->packet_life_time + 1;
 
-       spin_lock_irqsave(&cm.lock, flags);
-       if (&cm_id_priv->av == av)
-               list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
-       else if (&cm_id_priv->alt_av == av)
-               list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
-       else
-               ret = -EINVAL;
-
-       spin_unlock_irqrestore(&cm.lock, flags);
-
+       ret = add_cm_id_to_port_list(cm_id_priv, av, port);
        return ret;
 }
 
@@ -1523,6 +1543,8 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
                cm_req_get_primary_local_ack_timeout(req_msg);
        primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
        primary_path->service_id = req_msg->service_id;
+       if (sa_path_is_roce(primary_path))
+               primary_path->roce.route_resolved = false;
 
        if (cm_req_has_alt_path(req_msg)) {
                alt_path->dgid = req_msg->alt_local_gid;
@@ -1542,6 +1564,9 @@ static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
                        cm_req_get_alt_local_ack_timeout(req_msg);
                alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
                alt_path->service_id = req_msg->service_id;
+
+               if (sa_path_is_roce(alt_path))
+                       alt_path->roce.route_resolved = false;
        }
        cm_format_path_lid_from_req(req_msg, primary_path, alt_path);
 }
@@ -3150,6 +3175,13 @@ static int cm_lap_handler(struct cm_work *work)
        struct ib_mad_send_buf *msg = NULL;
        int ret;
 
+       /* Currently Alternate path messages are not supported for
+        * RoCE link layer.
+        */
+       if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+                              work->port->port_num))
+               return -EINVAL;
+
        /* todo: verify LAP request and send reject APR if invalid. */
        lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
        cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
@@ -3299,6 +3331,13 @@ static int cm_apr_handler(struct cm_work *work)
        struct cm_apr_msg *apr_msg;
        int ret;
 
+       /* Currently Alternate path messages are not supported for
+        * RoCE link layer.
+        */
+       if (rdma_protocol_roce(work->port->cm_dev->ib_device,
+                              work->port->port_num))
+               return -EINVAL;
+
        apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
        cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
                                   apr_msg->local_comm_id);
index 6ab1059..51a6410 100644 (file)
@@ -62,6 +62,7 @@
 #include <rdma/iw_cm.h>
 
 #include "core_priv.h"
+#include "cma_priv.h"
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -174,7 +175,7 @@ static struct cma_pernet *cma_pernet(struct net *net)
        return net_generic(net, cma_pernet_id);
 }
 
-static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
+static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps)
 {
        struct cma_pernet *pernet = cma_pernet(net);
 
@@ -203,7 +204,7 @@ struct cma_device {
 };
 
 struct rdma_bind_list {
-       enum rdma_port_space    ps;
+       enum rdma_ucm_port_space ps;
        struct hlist_head       owners;
        unsigned short          port;
 };
@@ -216,7 +217,7 @@ struct class_port_info_context {
        u8                              port_num;
 };
 
-static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
+static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
                        struct rdma_bind_list *bind_list, int snum)
 {
        struct idr *idr = cma_pernet_idr(net, ps);
@@ -225,14 +226,15 @@ static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
 }
 
 static struct rdma_bind_list *cma_ps_find(struct net *net,
-                                         enum rdma_port_space ps, int snum)
+                                         enum rdma_ucm_port_space ps, int snum)
 {
        struct idr *idr = cma_pernet_idr(net, ps);
 
        return idr_find(idr, snum);
 }
 
-static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
+static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
+                         int snum)
 {
        struct idr *idr = cma_pernet_idr(net, ps);
 
@@ -327,46 +329,6 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
  * We do this by disabling removal notification while a callback is in process,
  * and reporting it after the callback completes.
  */
-struct rdma_id_private {
-       struct rdma_cm_id       id;
-
-       struct rdma_bind_list   *bind_list;
-       struct hlist_node       node;
-       struct list_head        list; /* listen_any_list or cma_device.list */
-       struct list_head        listen_list; /* per device listens */
-       struct cma_device       *cma_dev;
-       struct list_head        mc_list;
-
-       int                     internal_id;
-       enum rdma_cm_state      state;
-       spinlock_t              lock;
-       struct mutex            qp_mutex;
-
-       struct completion       comp;
-       atomic_t                refcount;
-       struct mutex            handler_mutex;
-
-       int                     backlog;
-       int                     timeout_ms;
-       struct ib_sa_query      *query;
-       int                     query_id;
-       union {
-               struct ib_cm_id *ib;
-               struct iw_cm_id *iw;
-       } cm_id;
-
-       u32                     seq_num;
-       u32                     qkey;
-       u32                     qp_num;
-       pid_t                   owner;
-       u32                     options;
-       u8                      srq;
-       u8                      tos;
-       bool                    tos_set;
-       u8                      reuseaddr;
-       u8                      afonly;
-       enum ib_gid_type        gid_type;
-};
 
 struct cma_multicast {
        struct rdma_id_private *id_priv;
@@ -505,6 +467,8 @@ static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
        id_priv->id.route.addr.dev_addr.transport =
                rdma_node_get_transport(cma_dev->device->node_type);
        list_add_tail(&id_priv->list, &cma_dev->id_list);
+       id_priv->res.type = RDMA_RESTRACK_CM_ID;
+       rdma_restrack_add(&id_priv->res);
 }
 
 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
@@ -777,10 +741,10 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
                complete(&id_priv->comp);
 }
 
-struct rdma_cm_id *rdma_create_id(struct net *net,
-                                 rdma_cm_event_handler event_handler,
-                                 void *context, enum rdma_port_space ps,
-                                 enum ib_qp_type qp_type)
+struct rdma_cm_id *__rdma_create_id(struct net *net,
+                                   rdma_cm_event_handler event_handler,
+                                   void *context, enum rdma_ucm_port_space ps,
+                                   enum ib_qp_type qp_type, const char *caller)
 {
        struct rdma_id_private *id_priv;
 
@@ -788,7 +752,10 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
        if (!id_priv)
                return ERR_PTR(-ENOMEM);
 
-       id_priv->owner = task_pid_nr(current);
+       if (caller)
+               id_priv->res.kern_name = caller;
+       else
+               rdma_restrack_set_task(&id_priv->res, current);
        id_priv->state = RDMA_CM_IDLE;
        id_priv->id.context = context;
        id_priv->id.event_handler = event_handler;
@@ -808,7 +775,7 @@ struct rdma_cm_id *rdma_create_id(struct net *net,
 
        return &id_priv->id;
 }
-EXPORT_SYMBOL(rdma_create_id);
+EXPORT_SYMBOL(__rdma_create_id);
 
 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
 {
@@ -1400,7 +1367,7 @@ static struct net_device *cma_get_net_dev(struct ib_cm_event *ib_event,
        return net_dev;
 }
 
-static enum rdma_port_space rdma_ps_from_service_id(__be64 service_id)
+static enum rdma_ucm_port_space rdma_ps_from_service_id(__be64 service_id)
 {
        return (be64_to_cpu(service_id) >> 16) & 0xffff;
 }
@@ -1441,21 +1408,12 @@ static bool cma_match_private_data(struct rdma_id_private *id_priv,
        return true;
 }
 
-static bool cma_protocol_roce_dev_port(struct ib_device *device, int port_num)
-{
-       enum rdma_link_layer ll = rdma_port_get_link_layer(device, port_num);
-       enum rdma_transport_type transport =
-               rdma_node_get_transport(device->node_type);
-
-       return ll == IB_LINK_LAYER_ETHERNET && transport == RDMA_TRANSPORT_IB;
-}
-
 static bool cma_protocol_roce(const struct rdma_cm_id *id)
 {
        struct ib_device *device = id->device;
        const int port_num = id->port_num ?: rdma_start_port(device);
 
-       return cma_protocol_roce_dev_port(device, port_num);
+       return rdma_protocol_roce(device, port_num);
 }
 
 static bool cma_match_net_dev(const struct rdma_cm_id *id,
@@ -1468,7 +1426,7 @@ static bool cma_match_net_dev(const struct rdma_cm_id *id,
                /* This request is an AF_IB request or a RoCE request */
                return (!id->port_num || id->port_num == port_num) &&
                       (addr->src_addr.ss_family == AF_IB ||
-                       cma_protocol_roce_dev_port(id->device, port_num));
+                       rdma_protocol_roce(id->device, port_num));
 
        return !addr->dev_addr.bound_dev_if ||
               (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
@@ -1523,7 +1481,7 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
                if (PTR_ERR(*net_dev) == -EAFNOSUPPORT) {
                        /* Assuming the protocol is AF_IB */
                        *net_dev = NULL;
-               } else if (cma_protocol_roce_dev_port(req.device, req.port)) {
+               } else if (rdma_protocol_roce(req.device, req.port)) {
                        /* TODO find the net dev matching the request parameters
                         * through the RoCE GID table */
                        *net_dev = NULL;
@@ -1668,6 +1626,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
        mutex_unlock(&id_priv->handler_mutex);
 
        if (id_priv->cma_dev) {
+               rdma_restrack_del(&id_priv->res);
                if (rdma_cap_ib_cm(id_priv->id.device, 1)) {
                        if (id_priv->cm_id.ib)
                                ib_destroy_cm_id(id_priv->cm_id.ib);
@@ -1817,6 +1776,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
                                               struct ib_cm_event *ib_event,
                                               struct net_device *net_dev)
 {
+       struct rdma_id_private *listen_id_priv;
        struct rdma_id_private *id_priv;
        struct rdma_cm_id *id;
        struct rdma_route *rt;
@@ -1826,9 +1786,11 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
                ib_event->param.req_rcvd.primary_path->service_id;
        int ret;
 
-       id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+       listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+       id = __rdma_create_id(listen_id->route.addr.dev_addr.net,
                            listen_id->event_handler, listen_id->context,
-                           listen_id->ps, ib_event->param.req_rcvd.qp_type);
+                           listen_id->ps, ib_event->param.req_rcvd.qp_type,
+                           listen_id_priv->res.kern_name);
        if (IS_ERR(id))
                return NULL;
 
@@ -1877,14 +1839,17 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
                                              struct ib_cm_event *ib_event,
                                              struct net_device *net_dev)
 {
+       struct rdma_id_private *listen_id_priv;
        struct rdma_id_private *id_priv;
        struct rdma_cm_id *id;
        const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
        struct net *net = listen_id->route.addr.dev_addr.net;
        int ret;
 
-       id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
-                           listen_id->ps, IB_QPT_UD);
+       listen_id_priv = container_of(listen_id, struct rdma_id_private, id);
+       id = __rdma_create_id(net, listen_id->event_handler, listen_id->context,
+                             listen_id->ps, IB_QPT_UD,
+                             listen_id_priv->res.kern_name);
        if (IS_ERR(id))
                return NULL;
 
@@ -2150,10 +2115,11 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
                goto out;
 
        /* Create a new RDMA id for the new IW CM ID */
-       new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
-                                  listen_id->id.event_handler,
-                                  listen_id->id.context,
-                                  RDMA_PS_TCP, IB_QPT_RC);
+       new_cm_id = __rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+                                    listen_id->id.event_handler,
+                                    listen_id->id.context,
+                                    RDMA_PS_TCP, IB_QPT_RC,
+                                    listen_id->res.kern_name);
        if (IS_ERR(new_cm_id)) {
                ret = -ENOMEM;
                goto out;
@@ -2278,8 +2244,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
        if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
                return;
 
-       id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
-                           id_priv->id.qp_type);
+       id = __rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
+                             id_priv->id.qp_type, id_priv->res.kern_name);
        if (IS_ERR(id))
                return;
 
@@ -2541,6 +2507,7 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
                gid_type = ib_network_to_gid_type(addr->dev_addr.network);
        route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
 
+       route->path_rec->roce.route_resolved = true;
        sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
        sa_path_set_ifindex(route->path_rec, ndev->ifindex);
        sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
@@ -3028,7 +2995,7 @@ static void cma_bind_port(struct rdma_bind_list *bind_list,
        hlist_add_head(&id_priv->node, &bind_list->owners);
 }
 
-static int cma_alloc_port(enum rdma_port_space ps,
+static int cma_alloc_port(enum rdma_ucm_port_space ps,
                          struct rdma_id_private *id_priv, unsigned short snum)
 {
        struct rdma_bind_list *bind_list;
@@ -3091,7 +3058,7 @@ static int cma_port_is_unique(struct rdma_bind_list *bind_list,
        return 0;
 }
 
-static int cma_alloc_any_port(enum rdma_port_space ps,
+static int cma_alloc_any_port(enum rdma_ucm_port_space ps,
                              struct rdma_id_private *id_priv)
 {
        static unsigned int last_used_port;
@@ -3169,7 +3136,7 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
        return 0;
 }
 
-static int cma_use_port(enum rdma_port_space ps,
+static int cma_use_port(enum rdma_ucm_port_space ps,
                        struct rdma_id_private *id_priv)
 {
        struct rdma_bind_list *bind_list;
@@ -3203,8 +3170,8 @@ static int cma_bind_listen(struct rdma_id_private *id_priv)
        return ret;
 }
 
-static enum rdma_port_space cma_select_inet_ps(
-               struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_inet_ps(struct rdma_id_private *id_priv)
 {
        switch (id_priv->id.ps) {
        case RDMA_PS_TCP:
@@ -3218,9 +3185,10 @@ static enum rdma_port_space cma_select_inet_ps(
        }
 }
 
-static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
+static enum rdma_ucm_port_space
+cma_select_ib_ps(struct rdma_id_private *id_priv)
 {
-       enum rdma_port_space ps = 0;
+       enum rdma_ucm_port_space ps = 0;
        struct sockaddr_ib *sib;
        u64 sid_ps, mask, sid;
 
@@ -3251,7 +3219,7 @@ static enum rdma_port_space cma_select_ib_ps(struct rdma_id_private *id_priv)
 
 static int cma_get_port(struct rdma_id_private *id_priv)
 {
-       enum rdma_port_space ps;
+       enum rdma_ucm_port_space ps;
        int ret;
 
        if (cma_family(id_priv) != AF_IB)
@@ -3389,8 +3357,10 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
        return 0;
 err2:
-       if (id_priv->cma_dev)
+       if (id_priv->cma_dev) {
+               rdma_restrack_del(&id_priv->res);
                cma_release_dev(id_priv);
+       }
 err1:
        cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_IDLE);
        return ret;
@@ -3773,14 +3743,18 @@ static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
        return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
 }
 
-int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
+int __rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param,
+                 const char *caller)
 {
        struct rdma_id_private *id_priv;
        int ret;
 
        id_priv = container_of(id, struct rdma_id_private, id);
 
-       id_priv->owner = task_pid_nr(current);
+       if (caller)
+               id_priv->res.kern_name = caller;
+       else
+               rdma_restrack_set_task(&id_priv->res, current);
 
        if (!cma_comp(id_priv, RDMA_CM_CONNECT))
                return -EINVAL;
@@ -3820,7 +3794,7 @@ reject:
        rdma_reject(id, NULL, 0);
        return ret;
 }
-EXPORT_SYMBOL(rdma_accept);
+EXPORT_SYMBOL(__rdma_accept);
 
 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
 {
@@ -3938,10 +3912,14 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
                        rdma_start_port(id_priv->cma_dev->device)];
 
                event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
-               ib_init_ah_from_mcmember(id_priv->id.device,
-                                        id_priv->id.port_num, &multicast->rec,
-                                        ndev, gid_type,
-                                        &event.param.ud.ah_attr);
+               ret = ib_init_ah_from_mcmember(id_priv->id.device,
+                                              id_priv->id.port_num,
+                                              &multicast->rec,
+                                              ndev, gid_type,
+                                              &event.param.ud.ah_attr);
+               if (ret)
+                       event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
+
                event.param.ud.qp_num = 0xFFFFFF;
                event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
                if (ndev)
@@ -4501,7 +4479,7 @@ static int cma_get_id_stats(struct sk_buff *skb, struct netlink_callback *cb)
                                          RDMA_NL_RDMA_CM_ATTR_DST_ADDR))
                                goto out;
 
-                       id_stats->pid           = id_priv->owner;
+                       id_stats->pid   = task_pid_vnr(id_priv->res.task);
                        id_stats->port_space    = id->ps;
                        id_stats->cm_state      = id_priv->state;
                        id_stats->qp_num        = id_priv->qp_num;
diff --git a/drivers/infiniband/core/cma_priv.h b/drivers/infiniband/core/cma_priv.h
new file mode 100644 (file)
index 0000000..194cfe7
--- /dev/null
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
+ * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
+ * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _CMA_PRIV_H
+#define _CMA_PRIV_H
+
+enum rdma_cm_state {
+       RDMA_CM_IDLE,
+       RDMA_CM_ADDR_QUERY,
+       RDMA_CM_ADDR_RESOLVED,
+       RDMA_CM_ROUTE_QUERY,
+       RDMA_CM_ROUTE_RESOLVED,
+       RDMA_CM_CONNECT,
+       RDMA_CM_DISCONNECT,
+       RDMA_CM_ADDR_BOUND,
+       RDMA_CM_LISTEN,
+       RDMA_CM_DEVICE_REMOVAL,
+       RDMA_CM_DESTROYING
+};
+
+struct rdma_id_private {
+       struct rdma_cm_id       id;
+
+       struct rdma_bind_list   *bind_list;
+       struct hlist_node       node;
+       struct list_head        list; /* listen_any_list or cma_device.list */
+       struct list_head        listen_list; /* per device listens */
+       struct cma_device       *cma_dev;
+       struct list_head        mc_list;
+
+       int                     internal_id;
+       enum rdma_cm_state      state;
+       spinlock_t              lock;
+       struct mutex            qp_mutex;
+
+       struct completion       comp;
+       atomic_t                refcount;
+       struct mutex            handler_mutex;
+
+       int                     backlog;
+       int                     timeout_ms;
+       struct ib_sa_query      *query;
+       int                     query_id;
+       union {
+               struct ib_cm_id *ib;
+               struct iw_cm_id *iw;
+       } cm_id;
+
+       u32                     seq_num;
+       u32                     qkey;
+       u32                     qp_num;
+       u32                     options;
+       u8                      srq;
+       u8                      tos;
+       bool                    tos_set;
+       u8                      reuseaddr;
+       u8                      afonly;
+       enum ib_gid_type        gid_type;
+
+       /*
+        * Internal to RDMA/core, don't use in the drivers
+        */
+       struct rdma_restrack_entry     res;
+};
+#endif /* _CMA_PRIV_H */
index 25bb178..54163a6 100644 (file)
@@ -333,4 +333,15 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
 
        return qp;
 }
+
+struct rdma_dev_addr;
+int rdma_resolve_ip_route(struct sockaddr *src_addr,
+                         const struct sockaddr *dst_addr,
+                         struct rdma_dev_addr *addr);
+
+int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
+                                const union ib_gid *dgid,
+                                u8 *dmac, const struct net_device *ndev,
+                                int *hoplimit);
+
 #endif /* _CORE_PRIV_H */
index b7459cf..ea9fbcf 100644 (file)
@@ -103,7 +103,6 @@ static int ib_device_check_mandatory(struct ib_device *device)
                IB_MANDATORY_FUNC(query_device),
                IB_MANDATORY_FUNC(query_port),
                IB_MANDATORY_FUNC(query_pkey),
-               IB_MANDATORY_FUNC(query_gid),
                IB_MANDATORY_FUNC(alloc_pd),
                IB_MANDATORY_FUNC(dealloc_pd),
                IB_MANDATORY_FUNC(create_ah),
@@ -853,7 +852,7 @@ int ib_query_port(struct ib_device *device,
        if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
                return 0;
 
-       err = ib_query_gid(device, port_num, 0, &gid, NULL);
+       err = device->query_gid(device, port_num, 0, &gid);
        if (err)
                return err;
 
@@ -871,19 +870,13 @@ EXPORT_SYMBOL(ib_query_port);
  * @attr: Returned GID attributes related to this GID index (only in RoCE).
  *   NULL means ignore.
  *
- * ib_query_gid() fetches the specified GID table entry.
+ * ib_query_gid() fetches the specified GID table entry from the cache.
  */
 int ib_query_gid(struct ib_device *device,
                 u8 port_num, int index, union ib_gid *gid,
                 struct ib_gid_attr *attr)
 {
-       if (rdma_cap_roce_gid_table(device, port_num))
-               return ib_get_cached_gid(device, port_num, index, gid, attr);
-
-       if (attr)
-               return -EINVAL;
-
-       return device->query_gid(device, port_num, index, gid);
+       return ib_get_cached_gid(device, port_num, index, gid, attr);
 }
 EXPORT_SYMBOL(ib_query_gid);
 
@@ -1049,19 +1042,18 @@ EXPORT_SYMBOL(ib_modify_port);
  *   a specified GID value occurs. Its searches only for IB link layer.
  * @device: The device to query.
  * @gid: The GID value to search for.
- * @ndev: The ndev related to the GID to search for.
  * @port_num: The port number of the device where the GID value was found.
  * @index: The index into the GID table where the GID was found.  This
  *   parameter may be NULL.
  */
 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               struct net_device *ndev, u8 *port_num, u16 *index)
+               u8 *port_num, u16 *index)
 {
        union ib_gid tmp_gid;
        int ret, port, i;
 
        for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
-               if (rdma_cap_roce_gid_table(device, port))
+               if (!rdma_protocol_ib(device, port))
                        continue;
 
                for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
index 81528f6..9821ae9 100644 (file)
@@ -439,10 +439,9 @@ struct sk_buff *iwpm_create_nlmsg(u32 nl_op, struct nlmsghdr **nlh,
        struct sk_buff *skb = NULL;
 
        skb = dev_alloc_skb(IWPM_MSG_SIZE);
-       if (!skb) {
-               pr_err("%s Unable to allocate skb\n", __func__);
+       if (!skb)
                goto create_nlmsg_exit;
-       }
+
        if (!(ibnl_put_msg(skb, nlh, 0, 0, nl_client, nl_op,
                           NLM_F_REQUEST))) {
                pr_warn("%s: Unable to put the nlmsg header\n", __func__);
index 45f2f09..4eb72ff 100644 (file)
@@ -724,21 +724,19 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
 {
        int ret;
        u16 gid_index;
-       u8 p;
-
-       if (rdma_protocol_roce(device, port_num)) {
-               ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
-                                                gid_type, port_num,
-                                                ndev,
-                                                &gid_index);
-       } else if (rdma_protocol_ib(device, port_num)) {
-               ret = ib_find_cached_gid(device, &rec->port_gid,
-                                        IB_GID_TYPE_IB, NULL, &p,
-                                        &gid_index);
-       } else {
-               ret = -EINVAL;
-       }
 
+       /* GID table is not based on the netdevice for IB link layer,
+        * so ignore ndev during search.
+        */
+       if (rdma_protocol_ib(device, port_num))
+               ndev = NULL;
+       else if (!rdma_protocol_roce(device, port_num))
+               return -EINVAL;
+
+       ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
+                                        gid_type, port_num,
+                                        ndev,
+                                        &gid_index);
        if (ret)
                return ret;
 
index 5326a68..eb56776 100644 (file)
 #include <linux/pid.h>
 #include <linux/pid_namespace.h>
 #include <net/netlink.h>
+#include <rdma/rdma_cm.h>
 #include <rdma/rdma_netlink.h>
 
 #include "core_priv.h"
+#include "cma_priv.h"
 
 static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
        [RDMA_NLDEV_ATTR_DEV_INDEX]     = { .type = NLA_U32 },
@@ -71,6 +73,31 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
        [RDMA_NLDEV_ATTR_RES_PID]               = { .type = NLA_U32 },
        [RDMA_NLDEV_ATTR_RES_KERN_NAME]         = { .type = NLA_NUL_STRING,
                                                    .len = TASK_COMM_LEN },
+       [RDMA_NLDEV_ATTR_RES_CM_ID]             = { .type = NLA_NESTED },
+       [RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]       = { .type = NLA_NESTED },
+       [RDMA_NLDEV_ATTR_RES_PS]                = { .type = NLA_U32 },
+       [RDMA_NLDEV_ATTR_RES_SRC_ADDR]  = {
+                       .len = sizeof(struct __kernel_sockaddr_storage) },
+       [RDMA_NLDEV_ATTR_RES_DST_ADDR]  = {
+                       .len = sizeof(struct __kernel_sockaddr_storage) },
+       [RDMA_NLDEV_ATTR_RES_CQ]                = { .type = NLA_NESTED },
+       [RDMA_NLDEV_ATTR_RES_CQ_ENTRY]          = { .type = NLA_NESTED },
+       [RDMA_NLDEV_ATTR_RES_CQE]               = { .type = NLA_U32 },
+       [RDMA_NLDEV_ATTR_RES_USECNT]            = { .type = NLA_U64 },
+       [RDMA_NLDEV_ATTR_RES_POLL_CTX]          = { .type = NLA_U8 },
+       [RDMA_NLDEV_ATTR_RES_MR]                = { .type = NLA_NESTED },
+       [RDMA_NLDEV_ATTR_RES_MR_ENTRY]          = { .type = NLA_NESTED },
+       [RDMA_NLDEV_ATTR_RES_RKEY]              = { .type = NLA_U32 },
+       [RDMA_NLDEV_ATTR_RES_LKEY]              = { .type = NLA_U32 },
+       [RDMA_NLDEV_ATTR_RES_IOVA]              = { .type = NLA_U64 },
+       [RDMA_NLDEV_ATTR_RES_MRLEN]             = { .type = NLA_U64 },
+       [RDMA_NLDEV_ATTR_RES_PD]                = { .type = NLA_NESTED },
+       [RDMA_NLDEV_ATTR_RES_PD_ENTRY]          = { .type = NLA_NESTED },
+       [RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]    = { .type = NLA_U32 },
+       [RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY] = { .type = NLA_U32 },
+       [RDMA_NLDEV_ATTR_NDEV_INDEX]            = { .type = NLA_U32 },
+       [RDMA_NLDEV_ATTR_NDEV_NAME]             = { .type = NLA_NUL_STRING,
+                                                   .len = IFNAMSIZ },
 };
 
 static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
@@ -99,7 +126,7 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
                return -EMSGSIZE;
 
        ib_get_device_fw_str(device, fw);
-       /* Device without FW has strlen(fw) */
+       /* Device without FW has strlen(fw) = 0 */
        if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
                return -EMSGSIZE;
 
@@ -115,8 +142,10 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
 }
 
 static int fill_port_info(struct sk_buff *msg,
-                         struct ib_device *device, u32 port)
+                         struct ib_device *device, u32 port,
+                         const struct net *net)
 {
+       struct net_device *netdev = NULL;
        struct ib_port_attr attr;
        int ret;
 
@@ -150,7 +179,23 @@ static int fill_port_info(struct sk_buff *msg,
                return -EMSGSIZE;
        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
                return -EMSGSIZE;
-       return 0;
+
+       if (device->get_netdev)
+               netdev = device->get_netdev(device, port);
+
+       if (netdev && net_eq(dev_net(netdev), net)) {
+               ret = nla_put_u32(msg,
+                                 RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
+               if (ret)
+                       goto out;
+               ret = nla_put_string(msg,
+                                    RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
+       }
+
+out:
+       if (netdev)
+               dev_put(netdev);
+       return ret;
 }
 
 static int fill_res_info_entry(struct sk_buff *msg,
@@ -182,6 +227,8 @@ static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
                [RDMA_RESTRACK_PD] = "pd",
                [RDMA_RESTRACK_CQ] = "cq",
                [RDMA_RESTRACK_QP] = "qp",
+               [RDMA_RESTRACK_CM_ID] = "cm_id",
+               [RDMA_RESTRACK_MR] = "mr",
        };
 
        struct rdma_restrack_root *res = &device->res;
@@ -212,10 +259,29 @@ err:
        return ret;
 }
 
-static int fill_res_qp_entry(struct sk_buff *msg,
-                            struct ib_qp *qp, uint32_t port)
+static int fill_res_name_pid(struct sk_buff *msg,
+                            struct rdma_restrack_entry *res)
+{
+       /*
+        * For user resources, user is should read /proc/PID/comm to get the
+        * name of the task file.
+        */
+       if (rdma_is_kernel_res(res)) {
+               if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
+                   res->kern_name))
+                       return -EMSGSIZE;
+       } else {
+               if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID,
+                   task_pid_vnr(res->task)))
+                       return -EMSGSIZE;
+       }
+       return 0;
+}
+
+static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
+                            struct rdma_restrack_entry *res, uint32_t port)
 {
-       struct rdma_restrack_entry *res = &qp->res;
+       struct ib_qp *qp = container_of(res, struct ib_qp, res);
        struct ib_qp_init_attr qp_init_attr;
        struct nlattr *entry_attr;
        struct ib_qp_attr qp_attr;
@@ -262,19 +328,172 @@ static int fill_res_qp_entry(struct sk_buff *msg,
        if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
                goto err;
 
-       /*
-        * Existence of task means that it is user QP and netlink
-        * user is invited to go and read /proc/PID/comm to get name
-        * of the task file and res->task_com should be NULL.
-        */
-       if (rdma_is_kernel_res(res)) {
-               if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME, res->kern_name))
+       if (fill_res_name_pid(msg, res))
+               goto err;
+
+       nla_nest_end(msg, entry_attr);
+       return 0;
+
+err:
+       nla_nest_cancel(msg, entry_attr);
+out:
+       return -EMSGSIZE;
+}
+
+static int fill_res_cm_id_entry(struct sk_buff *msg,
+                               struct netlink_callback *cb,
+                               struct rdma_restrack_entry *res, uint32_t port)
+{
+       struct rdma_id_private *id_priv =
+                               container_of(res, struct rdma_id_private, res);
+       struct rdma_cm_id *cm_id = &id_priv->id;
+       struct nlattr *entry_attr;
+
+       if (port && port != cm_id->port_num)
+               return 0;
+
+       entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY);
+       if (!entry_attr)
+               goto out;
+
+       if (cm_id->port_num &&
+           nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
+               goto err;
+
+       if (id_priv->qp_num) {
+               if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
                        goto err;
-       } else {
-               if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, task_pid_vnr(res->task)))
+               if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
                        goto err;
        }
 
+       if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
+               goto err;
+
+       if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
+               goto err;
+
+       if (cm_id->route.addr.src_addr.ss_family &&
+           nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
+                   sizeof(cm_id->route.addr.src_addr),
+                   &cm_id->route.addr.src_addr))
+               goto err;
+       if (cm_id->route.addr.dst_addr.ss_family &&
+           nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
+                   sizeof(cm_id->route.addr.dst_addr),
+                   &cm_id->route.addr.dst_addr))
+               goto err;
+
+       if (fill_res_name_pid(msg, res))
+               goto err;
+
+       nla_nest_end(msg, entry_attr);
+       return 0;
+
+err:
+       nla_nest_cancel(msg, entry_attr);
+out:
+       return -EMSGSIZE;
+}
+
+static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
+                            struct rdma_restrack_entry *res, uint32_t port)
+{
+       struct ib_cq *cq = container_of(res, struct ib_cq, res);
+       struct nlattr *entry_attr;
+
+       entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
+       if (!entry_attr)
+               goto out;
+
+       if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
+               goto err;
+       if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+                             atomic_read(&cq->usecnt), 0))
+               goto err;
+
+       /* Poll context is only valid for kernel CQs */
+       if (rdma_is_kernel_res(res) &&
+           nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
+               goto err;
+
+       if (fill_res_name_pid(msg, res))
+               goto err;
+
+       nla_nest_end(msg, entry_attr);
+       return 0;
+
+err:
+       nla_nest_cancel(msg, entry_attr);
+out:
+       return -EMSGSIZE;
+}
+
+static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
+                            struct rdma_restrack_entry *res, uint32_t port)
+{
+       struct ib_mr *mr = container_of(res, struct ib_mr, res);
+       struct nlattr *entry_attr;
+
+       entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
+       if (!entry_attr)
+               goto out;
+
+       if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
+               if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
+                       goto err;
+               if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
+                       goto err;
+               if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
+                                     mr->iova, 0))
+                       goto err;
+       }
+
+       if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
+               goto err;
+
+       if (fill_res_name_pid(msg, res))
+               goto err;
+
+       nla_nest_end(msg, entry_attr);
+       return 0;
+
+err:
+       nla_nest_cancel(msg, entry_attr);
+out:
+       return -EMSGSIZE;
+}
+
+static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
+                            struct rdma_restrack_entry *res, uint32_t port)
+{
+       struct ib_pd *pd = container_of(res, struct ib_pd, res);
+       struct nlattr *entry_attr;
+
+       entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
+       if (!entry_attr)
+               goto out;
+
+       if (netlink_capable(cb->skb, CAP_NET_ADMIN)) {
+               if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
+                               pd->local_dma_lkey))
+                       goto err;
+               if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+                   nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+                               pd->unsafe_global_rkey))
+                       goto err;
+       }
+       if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
+                             atomic_read(&pd->usecnt), 0))
+               goto err;
+       if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
+           nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
+                       pd->unsafe_global_rkey))
+               goto err;
+
+       if (fill_res_name_pid(msg, res))
+               goto err;
+
        nla_nest_end(msg, entry_attr);
        return 0;
 
@@ -405,7 +624,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
                        RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
                        0, 0);
 
-       err = fill_port_info(msg, device, port);
+       err = fill_port_info(msg, device, port, sock_net(skb->sk));
        if (err)
                goto err_free;
 
@@ -465,7 +684,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
                                                 RDMA_NLDEV_CMD_PORT_GET),
                                0, NLM_F_MULTI);
 
-               if (fill_port_info(skb, device, p)) {
+               if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
                        nlmsg_cancel(skb, nlh);
                        goto out;
                }
@@ -558,23 +777,60 @@ static int nldev_res_get_dumpit(struct sk_buff *skb,
        return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
 }
 
-static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
-                                  struct netlink_callback *cb)
+struct nldev_fill_res_entry {
+       int (*fill_res_func)(struct sk_buff *msg, struct netlink_callback *cb,
+                            struct rdma_restrack_entry *res, u32 port);
+       enum rdma_nldev_attr nldev_attr;
+       enum rdma_nldev_command nldev_cmd;
+};
+
+static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
+       [RDMA_RESTRACK_QP] = {
+               .fill_res_func = fill_res_qp_entry,
+               .nldev_cmd = RDMA_NLDEV_CMD_RES_QP_GET,
+               .nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
+       },
+       [RDMA_RESTRACK_CM_ID] = {
+               .fill_res_func = fill_res_cm_id_entry,
+               .nldev_cmd = RDMA_NLDEV_CMD_RES_CM_ID_GET,
+               .nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
+       },
+       [RDMA_RESTRACK_CQ] = {
+               .fill_res_func = fill_res_cq_entry,
+               .nldev_cmd = RDMA_NLDEV_CMD_RES_CQ_GET,
+               .nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
+       },
+       [RDMA_RESTRACK_MR] = {
+               .fill_res_func = fill_res_mr_entry,
+               .nldev_cmd = RDMA_NLDEV_CMD_RES_MR_GET,
+               .nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
+       },
+       [RDMA_RESTRACK_PD] = {
+               .fill_res_func = fill_res_pd_entry,
+               .nldev_cmd = RDMA_NLDEV_CMD_RES_PD_GET,
+               .nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
+       },
+};
+
+static int res_get_common_dumpit(struct sk_buff *skb,
+                                struct netlink_callback *cb,
+                                enum rdma_restrack_type res_type)
 {
+       const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
        struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
        struct rdma_restrack_entry *res;
        int err, ret = 0, idx = 0;
        struct nlattr *table_attr;
        struct ib_device *device;
        int start = cb->args[0];
-       struct ib_qp *qp = NULL;
        struct nlmsghdr *nlh;
        u32 index, port = 0;
+       bool filled = false;
 
        err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
                          nldev_policy, NULL);
        /*
-        * Right now, we are expecting the device index to get QP information,
+        * Right now, we are expecting the device index to get res information,
         * but it is possible to extend this code to return all devices in
         * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
         * if it doesn't exist, we will iterate over all devices.
@@ -601,7 +857,7 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
        }
 
        nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
-                       RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_QP_GET),
+                       RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, fe->nldev_cmd),
                        0, NLM_F_MULTI);
 
        if (fill_nldev_handle(skb, device)) {
@@ -609,24 +865,26 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
                goto err;
        }
 
-       table_attr = nla_nest_start(skb, RDMA_NLDEV_ATTR_RES_QP);
+       table_attr = nla_nest_start(skb, fe->nldev_attr);
        if (!table_attr) {
                ret = -EMSGSIZE;
                goto err;
        }
 
        down_read(&device->res.rwsem);
-       hash_for_each_possible(device->res.hash, res, node, RDMA_RESTRACK_QP) {
+       hash_for_each_possible(device->res.hash, res, node, res_type) {
                if (idx < start)
                        goto next;
 
                if ((rdma_is_kernel_res(res) &&
                     task_active_pid_ns(current) != &init_pid_ns) ||
-                   (!rdma_is_kernel_res(res) &&
-                    task_active_pid_ns(current) != task_active_pid_ns(res->task)))
+                   (!rdma_is_kernel_res(res) && task_active_pid_ns(current) !=
+                    task_active_pid_ns(res->task)))
                        /*
-                        * 1. Kernel QPs should be visible in init namspace only
-                        * 2. Present only QPs visible in the current namespace
+                        * 1. Kern resources should be visible in init
+                        *    namspace only
+                        * 2. Present only resources visible in the current
+                        *    namespace
                         */
                        goto next;
 
@@ -638,10 +896,10 @@ static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
                         */
                        goto next;
 
-               qp = container_of(res, struct ib_qp, res);
+               filled = true;
 
                up_read(&device->res.rwsem);
-               ret = fill_res_qp_entry(skb, qp, port);
+               ret = fe->fill_res_func(skb, cb, res, port);
                down_read(&device->res.rwsem);
                /*
                 * Return resource back, but it won't be released till
@@ -667,10 +925,10 @@ next:             idx++;
        cb->args[0] = idx;
 
        /*
-        * No more QPs to fill, cancel the message and
+        * No more entries to fill, cancel the message and
         * return 0 to mark end of dumpit.
         */
-       if (!qp)
+       if (!filled)
                goto err;
 
        put_device(&device->dev);
@@ -688,6 +946,36 @@ err_index:
        return ret;
 }
 
+static int nldev_res_get_qp_dumpit(struct sk_buff *skb,
+                                  struct netlink_callback *cb)
+{
+       return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_QP);
+}
+
+static int nldev_res_get_cm_id_dumpit(struct sk_buff *skb,
+                                     struct netlink_callback *cb)
+{
+       return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CM_ID);
+}
+
+static int nldev_res_get_cq_dumpit(struct sk_buff *skb,
+                                  struct netlink_callback *cb)
+{
+       return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_CQ);
+}
+
+static int nldev_res_get_mr_dumpit(struct sk_buff *skb,
+                                  struct netlink_callback *cb)
+{
+       return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR);
+}
+
+static int nldev_res_get_pd_dumpit(struct sk_buff *skb,
+                                  struct netlink_callback *cb)
+{
+       return res_get_common_dumpit(skb, cb, RDMA_RESTRACK_PD);
+}
+
 static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
        [RDMA_NLDEV_CMD_GET] = {
                .doit = nldev_get_doit,
@@ -714,6 +1002,18 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
                 * too.
                 */
        },
+       [RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
+               .dump = nldev_res_get_cm_id_dumpit,
+       },
+       [RDMA_NLDEV_CMD_RES_CQ_GET] = {
+               .dump = nldev_res_get_cq_dumpit,
+       },
+       [RDMA_NLDEV_CMD_RES_MR_GET] = {
+               .dump = nldev_res_get_mr_dumpit,
+       },
+       [RDMA_NLDEV_CMD_RES_PD_GET] = {
+               .dump = nldev_res_get_pd_dumpit,
+       },
 };
 
 void __init nldev_init(void)
index d8eead5..a6e9049 100644 (file)
@@ -350,13 +350,6 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
        return type->type_class->alloc_begin(type, ucontext);
 }
 
-static void uverbs_uobject_add(struct ib_uobject *uobject)
-{
-       mutex_lock(&uobject->context->uobjects_lock);
-       list_add(&uobject->list, &uobject->context->uobjects);
-       mutex_unlock(&uobject->context->uobjects_lock);
-}
-
 static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
                                                  enum rdma_remove_reason why)
 {
@@ -502,7 +495,6 @@ out:
 
 static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
 {
-       uverbs_uobject_add(uobj);
        spin_lock(&uobj->context->ufile->idr_lock);
        /*
         * We already allocated this IDR with a NULL object, so
@@ -518,7 +510,6 @@ static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
        struct ib_uobject_file *uobj_file =
                container_of(uobj, struct ib_uobject_file, uobj);
 
-       uverbs_uobject_add(&uobj_file->uobj);
        fd_install(uobj_file->uobj.id, uobj->object);
        /* This shouldn't be used anymore. Use the file object instead */
        uobj_file->uobj.id = 0;
@@ -545,6 +536,10 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
        assert_uverbs_usecnt(uobj, true);
        atomic_set(&uobj->usecnt, 0);
 
+       mutex_lock(&uobj->context->uobjects_lock);
+       list_add(&uobj->list, &uobj->context->uobjects);
+       mutex_unlock(&uobj->context->uobjects_lock);
+
        uobj->type->type_class->alloc_commit(uobj);
        up_read(&uobj->context->cleanup_rwsem);
 
index 3dbc4e4..efddd13 100644 (file)
@@ -3,20 +3,66 @@
  * Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
  */
 
+#include <rdma/rdma_cm.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/restrack.h>
 #include <linux/mutex.h>
 #include <linux/sched/task.h>
 #include <linux/pid_namespace.h>
 
+#include "cma_priv.h"
+
 void rdma_restrack_init(struct rdma_restrack_root *res)
 {
        init_rwsem(&res->rwsem);
 }
 
+static const char *type2str(enum rdma_restrack_type type)
+{
+       static const char * const names[RDMA_RESTRACK_MAX] = {
+               [RDMA_RESTRACK_PD] = "PD",
+               [RDMA_RESTRACK_CQ] = "CQ",
+               [RDMA_RESTRACK_QP] = "QP",
+               [RDMA_RESTRACK_CM_ID] = "CM_ID",
+               [RDMA_RESTRACK_MR] = "MR",
+       };
+
+       return names[type];
+};
+
 void rdma_restrack_clean(struct rdma_restrack_root *res)
 {
-       WARN_ON_ONCE(!hash_empty(res->hash));
+       struct rdma_restrack_entry *e;
+       char buf[TASK_COMM_LEN];
+       struct ib_device *dev;
+       const char *owner;
+       int bkt;
+
+       if (hash_empty(res->hash))
+               return;
+
+       dev = container_of(res, struct ib_device, res);
+       pr_err("restrack: %s", CUT_HERE);
+       pr_err("restrack: BUG: RESTRACK detected leak of resources on %s\n",
+              dev->name);
+       hash_for_each(res->hash, bkt, e, node) {
+               if (rdma_is_kernel_res(e)) {
+                       owner = e->kern_name;
+               } else {
+                       /*
+                        * There is no need to call get_task_struct here,
+                        * because we can be here only if there are more
+                        * get_task_struct() call than put_task_struct().
+                        */
+                       get_task_comm(buf, e->task);
+                       owner = buf;
+               }
+
+               pr_err("restrack: %s %s object allocated by %s is not freed\n",
+                      rdma_is_kernel_res(e) ? "Kernel" : "User",
+                      type2str(e->type), owner);
+       }
+       pr_err("restrack: %s", CUT_HERE);
 }
 
 int rdma_restrack_count(struct rdma_restrack_root *res,
@@ -40,51 +86,48 @@ EXPORT_SYMBOL(rdma_restrack_count);
 
 static void set_kern_name(struct rdma_restrack_entry *res)
 {
-       enum rdma_restrack_type type = res->type;
-       struct ib_qp *qp;
-
-       if (type != RDMA_RESTRACK_QP)
-               /* PD and CQ types already have this name embedded in */
-               return;
+       struct ib_pd *pd;
 
-       qp = container_of(res, struct ib_qp, res);
-       if (!qp->pd) {
-               WARN_ONCE(true, "XRC QPs are not supported\n");
-               /* Survive, despite the programmer's error */
-               res->kern_name = " ";
-               return;
+       switch (res->type) {
+       case RDMA_RESTRACK_QP:
+               pd = container_of(res, struct ib_qp, res)->pd;
+               if (!pd) {
+                       WARN_ONCE(true, "XRC QPs are not supported\n");
+                       /* Survive, despite the programmer's error */
+                       res->kern_name = " ";
+               }
+               break;
+       case RDMA_RESTRACK_MR:
+               pd = container_of(res, struct ib_mr, res)->pd;
+               break;
+       default:
+               /* Other types set kern_name directly */
+               pd = NULL;
+               break;
        }
 
-       res->kern_name = qp->pd->res.kern_name;
+       if (pd)
+               res->kern_name = pd->res.kern_name;
 }
 
 static struct ib_device *res_to_dev(struct rdma_restrack_entry *res)
 {
-       enum rdma_restrack_type type = res->type;
-       struct ib_device *dev;
-       struct ib_pd *pd;
-       struct ib_cq *cq;
-       struct ib_qp *qp;
-
-       switch (type) {
+       switch (res->type) {
        case RDMA_RESTRACK_PD:
-               pd = container_of(res, struct ib_pd, res);
-               dev = pd->device;
-               break;
+               return container_of(res, struct ib_pd, res)->device;
        case RDMA_RESTRACK_CQ:
-               cq = container_of(res, struct ib_cq, res);
-               dev = cq->device;
-               break;
+               return container_of(res, struct ib_cq, res)->device;
        case RDMA_RESTRACK_QP:
-               qp = container_of(res, struct ib_qp, res);
-               dev = qp->device;
-               break;
+               return container_of(res, struct ib_qp, res)->device;
+       case RDMA_RESTRACK_CM_ID:
+               return container_of(res, struct rdma_id_private,
+                                   res)->id.device;
+       case RDMA_RESTRACK_MR:
+               return container_of(res, struct ib_mr, res)->device;
        default:
-               WARN_ONCE(true, "Wrong resource tracking type %u\n", type);
+               WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
                return NULL;
        }
-
-       return dev;
 }
 
 static bool res_is_user(struct rdma_restrack_entry *res)
@@ -96,6 +139,10 @@ static bool res_is_user(struct rdma_restrack_entry *res)
                return container_of(res, struct ib_cq, res)->uobject;
        case RDMA_RESTRACK_QP:
                return container_of(res, struct ib_qp, res)->uobject;
+       case RDMA_RESTRACK_CM_ID:
+               return !res->kern_name;
+       case RDMA_RESTRACK_MR:
+               return container_of(res, struct ib_mr, res)->pd->uobject;
        default:
                WARN_ONCE(true, "Wrong resource tracking type %u\n", res->type);
                return false;
@@ -109,13 +156,15 @@ void rdma_restrack_add(struct rdma_restrack_entry *res)
        if (!dev)
                return;
 
+       if (res->type != RDMA_RESTRACK_CM_ID || !res_is_user(res))
+               res->task = NULL;
+
        if (res_is_user(res)) {
-               get_task_struct(current);
-               res->task = current;
+               if (!res->task)
+                       rdma_restrack_set_task(res, current);
                res->kern_name = NULL;
        } else {
                set_kern_name(res);
-               res->task = NULL;
        }
 
        kref_init(&res->kref);
index 9f029a1..a61ec7e 100644 (file)
@@ -1227,118 +1227,130 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
        return src_path_mask;
 }
 
-int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
-                             struct sa_path_rec *rec,
-                             struct rdma_ah_attr *ah_attr)
+static int
+roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
+                            struct sa_path_rec *rec)
 {
+       struct net_device *resolved_dev;
+       struct net_device *ndev;
+       struct net_device *idev;
+       struct rdma_dev_addr dev_addr = {
+               .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
+                                sa_path_get_ifindex(rec) : 0),
+               .net = sa_path_get_ndev(rec) ?
+                       sa_path_get_ndev(rec) :
+                       &init_net
+       };
+       union {
+               struct sockaddr     _sockaddr;
+               struct sockaddr_in  _sockaddr_in;
+               struct sockaddr_in6 _sockaddr_in6;
+       } sgid_addr, dgid_addr;
        int ret;
-       u16 gid_index;
-       int use_roce;
-       struct net_device *ndev = NULL;
 
-       memset(ah_attr, 0, sizeof *ah_attr);
-       ah_attr->type = rdma_ah_find_type(device, port_num);
+       if (rec->roce.route_resolved)
+               return 0;
 
-       rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+       if (!device->get_netdev)
+               return -EOPNOTSUPP;
 
-       if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) &&
-           (rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)))
-               rdma_ah_set_make_grd(ah_attr, true);
+       rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
+       rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
 
-       rdma_ah_set_sl(ah_attr, rec->sl);
-       rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) &
-                             get_src_path_mask(device, port_num));
-       rdma_ah_set_port_num(ah_attr, port_num);
-       rdma_ah_set_static_rate(ah_attr, rec->rate);
-       use_roce = rdma_cap_eth_ah(device, port_num);
-
-       if (use_roce) {
-               struct net_device *idev;
-               struct net_device *resolved_dev;
-               struct rdma_dev_addr dev_addr = {
-                       .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
-                                        sa_path_get_ifindex(rec) : 0),
-                       .net = sa_path_get_ndev(rec) ?
-                               sa_path_get_ndev(rec) :
-                               &init_net
-               };
-               union {
-                       struct sockaddr     _sockaddr;
-                       struct sockaddr_in  _sockaddr_in;
-                       struct sockaddr_in6 _sockaddr_in6;
-               } sgid_addr, dgid_addr;
-
-               if (!device->get_netdev)
-                       return -EOPNOTSUPP;
-
-               rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
-               rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
-
-               /* validate the route */
-               ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
-                                           &dgid_addr._sockaddr, &dev_addr);
-               if (ret)
-                       return ret;
+       /* validate the route */
+       ret = rdma_resolve_ip_route(&sgid_addr._sockaddr,
+                                   &dgid_addr._sockaddr, &dev_addr);
+       if (ret)
+               return ret;
 
-               if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
-                    dev_addr.network == RDMA_NETWORK_IPV6) &&
-                   rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
-                       return -EINVAL;
+       if ((dev_addr.network == RDMA_NETWORK_IPV4 ||
+            dev_addr.network == RDMA_NETWORK_IPV6) &&
+           rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
+               return -EINVAL;
 
-               idev = device->get_netdev(device, port_num);
-               if (!idev)
-                       return -ENODEV;
+       idev = device->get_netdev(device, port_num);
+       if (!idev)
+               return -ENODEV;
 
-               resolved_dev = dev_get_by_index(dev_addr.net,
-                                               dev_addr.bound_dev_if);
-               if (!resolved_dev) {
-                       dev_put(idev);
-                       return -ENODEV;
-               }
-               ndev = ib_get_ndev_from_path(rec);
-               rcu_read_lock();
-               if ((ndev && ndev != resolved_dev) ||
-                   (resolved_dev != idev &&
-                    !rdma_is_upper_dev_rcu(idev, resolved_dev)))
-                       ret = -EHOSTUNREACH;
-               rcu_read_unlock();
-               dev_put(idev);
-               dev_put(resolved_dev);
-               if (ret) {
-                       if (ndev)
-                               dev_put(ndev);
-                       return ret;
-               }
+       resolved_dev = dev_get_by_index(dev_addr.net,
+                                       dev_addr.bound_dev_if);
+       if (!resolved_dev) {
+               ret = -ENODEV;
+               goto done;
        }
+       ndev = ib_get_ndev_from_path(rec);
+       rcu_read_lock();
+       if ((ndev && ndev != resolved_dev) ||
+           (resolved_dev != idev &&
+            !rdma_is_upper_dev_rcu(idev, resolved_dev)))
+               ret = -EHOSTUNREACH;
+       rcu_read_unlock();
+       dev_put(resolved_dev);
+       if (ndev)
+               dev_put(ndev);
+done:
+       dev_put(idev);
+       if (!ret)
+               rec->roce.route_resolved = true;
+       return ret;
+}
 
-       if (rec->hop_limit > 0 || use_roce) {
-               enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
+static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
+                                  struct sa_path_rec *rec,
+                                  struct rdma_ah_attr *ah_attr)
+{
+       enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
+       struct net_device *ndev;
+       u16 gid_index;
+       int ret;
 
-               ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
-                                                port_num, ndev, &gid_index);
-               if (ret) {
-                       if (ndev)
-                               dev_put(ndev);
-                       return ret;
-               }
+       ndev = ib_get_ndev_from_path(rec);
+       ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
+                                        port_num, ndev, &gid_index);
+       if (ndev)
+               dev_put(ndev);
+       if (ret)
+               return ret;
 
-               rdma_ah_set_grh(ah_attr, &rec->dgid,
-                               be32_to_cpu(rec->flow_label),
-                               gid_index, rec->hop_limit,
-                               rec->traffic_class);
-               if (ndev)
-                       dev_put(ndev);
-       }
+       rdma_ah_set_grh(ah_attr, &rec->dgid,
+                       be32_to_cpu(rec->flow_label),
+                       gid_index, rec->hop_limit,
+                       rec->traffic_class);
+       return 0;
+}
 
-       if (use_roce) {
-               u8 *dmac = sa_path_get_dmac(rec);
+int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
+                             struct sa_path_rec *rec,
+                             struct rdma_ah_attr *ah_attr)
+{
+       int ret = 0;
+
+       memset(ah_attr, 0, sizeof(*ah_attr));
+       ah_attr->type = rdma_ah_find_type(device, port_num);
+       rdma_ah_set_sl(ah_attr, rec->sl);
+       rdma_ah_set_port_num(ah_attr, port_num);
+       rdma_ah_set_static_rate(ah_attr, rec->rate);
 
-               if (!dmac)
-                       return -EINVAL;
-               memcpy(ah_attr->roce.dmac, dmac, ETH_ALEN);
+       if (sa_path_is_roce(rec)) {
+               ret = roce_resolve_route_from_path(device, port_num, rec);
+               if (ret)
+                       return ret;
+
+               memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN);
+       } else {
+               rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec)));
+               if (sa_path_is_opa(rec) &&
+                   rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE))
+                       rdma_ah_set_make_grd(ah_attr, true);
+
+               rdma_ah_set_path_bits(ah_attr,
+                                     be32_to_cpu(sa_path_get_slid(rec)) &
+                                     get_src_path_mask(device, port_num));
        }
 
-       return 0;
+       if (rec->hop_limit > 0 || sa_path_is_roce(rec))
+               ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
+       return ret;
 }
 EXPORT_SYMBOL(ib_init_ah_attr_from_path);
 
index 8ae1308..31c7efa 100644 (file)
@@ -273,6 +273,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
                break;
        case IB_SPEED_SDR:
        default:                /* default to SDR for invalid rates */
+               speed = " SDR";
                rate = 25;
                break;
        }
@@ -388,14 +389,26 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
 {
        struct port_table_attribute *tab_attr =
                container_of(attr, struct port_table_attribute, attr);
+       union ib_gid *pgid;
        union ib_gid gid;
        ssize_t ret;
 
        ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
-       if (ret)
-               return ret;
 
-       return sprintf(buf, "%pI6\n", gid.raw);
+       /* If reading GID fails, it is likely due to GID entry being empty
+        * (invalid) or reserved GID in the table.
+        * User space expects to read GID table entries as long as it given
+        * index is within GID table size.
+        * Administrative/debugging tool fails to query rest of the GID entries
+        * if it hits error while querying a GID of the given index.
+        * To avoid user space throwing such error on fail to read gid, return
+        * zero GID as before. This maintains backward compatibility.
+        */
+       if (ret)
+               pgid = &zgid;
+       else
+               pgid = &gid;
+       return sprintf(buf, "%pI6\n", pgid->raw);
 }
 
 static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
@@ -810,10 +823,15 @@ static ssize_t show_hw_stats(struct kobject *kobj, struct attribute *attr,
                dev = port->ibdev;
                stats = port->hw_stats;
        }
+       mutex_lock(&stats->lock);
        ret = update_hw_stats(dev, stats, hsa->port_num, hsa->index);
        if (ret)
-               return ret;
-       return print_hw_stat(stats, hsa->index, buf);
+               goto unlock;
+       ret = print_hw_stat(stats, hsa->index, buf);
+unlock:
+       mutex_unlock(&stats->lock);
+
+       return ret;
 }
 
 static ssize_t show_stats_lifespan(struct kobject *kobj,
@@ -821,17 +839,25 @@ static ssize_t show_stats_lifespan(struct kobject *kobj,
                                   char *buf)
 {
        struct hw_stats_attribute *hsa;
+       struct rdma_hw_stats *stats;
        int msecs;
 
        hsa = container_of(attr, struct hw_stats_attribute, attr);
        if (!hsa->port_num) {
                struct ib_device *dev = container_of((struct device *)kobj,
                                                     struct ib_device, dev);
-               msecs = jiffies_to_msecs(dev->hw_stats->lifespan);
+
+               stats = dev->hw_stats;
        } else {
                struct ib_port *p = container_of(kobj, struct ib_port, kobj);
-               msecs = jiffies_to_msecs(p->hw_stats->lifespan);
+
+               stats = p->hw_stats;
        }
+
+       mutex_lock(&stats->lock);
+       msecs = jiffies_to_msecs(stats->lifespan);
+       mutex_unlock(&stats->lock);
+
        return sprintf(buf, "%d\n", msecs);
 }
 
@@ -840,6 +866,7 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
                                  const char *buf, size_t count)
 {
        struct hw_stats_attribute *hsa;
+       struct rdma_hw_stats *stats;
        int msecs;
        int jiffies;
        int ret;
@@ -854,11 +881,18 @@ static ssize_t set_stats_lifespan(struct kobject *kobj,
        if (!hsa->port_num) {
                struct ib_device *dev = container_of((struct device *)kobj,
                                                     struct ib_device, dev);
-               dev->hw_stats->lifespan = jiffies;
+
+               stats = dev->hw_stats;
        } else {
                struct ib_port *p = container_of(kobj, struct ib_port, kobj);
-               p->hw_stats->lifespan = jiffies;
+
+               stats = p->hw_stats;
        }
+
+       mutex_lock(&stats->lock);
+       stats->lifespan = jiffies;
+       mutex_unlock(&stats->lock);
+
        return count;
 }
 
@@ -951,6 +985,7 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
                sysfs_attr_init(hsag->attrs[i]);
        }
 
+       mutex_init(&stats->lock);
        /* treat an error here as non-fatal */
        hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
        if (hsag->attrs[i])
index 0170226..9eef96d 100644 (file)
@@ -430,7 +430,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
                uevent->resp.id = ctx->id;
        }
 
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &uevent->resp, sizeof(uevent->resp))) {
                result = -EFAULT;
                goto done;
@@ -441,7 +441,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
                        result = -ENOMEM;
                        goto done;
                }
-               if (copy_to_user((void __user *)(unsigned long)cmd.data,
+               if (copy_to_user(u64_to_user_ptr(cmd.data),
                                 uevent->data, uevent->data_len)) {
                        result = -EFAULT;
                        goto done;
@@ -453,7 +453,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
                        result = -ENOMEM;
                        goto done;
                }
-               if (copy_to_user((void __user *)(unsigned long)cmd.info,
+               if (copy_to_user(u64_to_user_ptr(cmd.info),
                                 uevent->info, uevent->info_len)) {
                        result = -EFAULT;
                        goto done;
@@ -502,7 +502,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
        }
 
        resp.id = ctx->id;
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp))) {
                result = -EFAULT;
                goto err2;
@@ -556,7 +556,7 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
        ib_ucm_cleanup_events(ctx);
 
        resp.events_reported = ctx->events_reported;
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                result = -EFAULT;
 
@@ -588,7 +588,7 @@ static ssize_t ib_ucm_attr_id(struct ib_ucm_file *file,
        resp.local_id     = ctx->cm_id->local_id;
        resp.remote_id    = ctx->cm_id->remote_id;
 
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                result = -EFAULT;
 
@@ -625,7 +625,7 @@ static ssize_t ib_ucm_init_qp_attr(struct ib_ucm_file *file,
 
        ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
 
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                result = -EFAULT;
 
@@ -699,7 +699,7 @@ static int ib_ucm_alloc_data(const void **dest, u64 src, u32 len)
        if (!len)
                return 0;
 
-       data = memdup_user((void __user *)(unsigned long)src, len);
+       data = memdup_user(u64_to_user_ptr(src), len);
        if (IS_ERR(data))
                return PTR_ERR(data);
 
@@ -721,7 +721,7 @@ static int ib_ucm_path_get(struct sa_path_rec **path, u64 src)
        if (!sa_path)
                return -ENOMEM;
 
-       if (copy_from_user(&upath, (void __user *)(unsigned long)src,
+       if (copy_from_user(&upath, u64_to_user_ptr(src),
                           sizeof(upath))) {
 
                kfree(sa_path);
index d933336..7432948 100644 (file)
@@ -382,7 +382,11 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
        struct ucma_event *uevent;
        int ret = 0;
 
-       if (out_len < sizeof uevent->resp)
+       /*
+        * Old 32 bit user space does not send the 4 byte padding in the
+        * reserved field. We don't care, allow it to keep working.
+        */
+       if (out_len < sizeof(uevent->resp) - sizeof(uevent->resp.reserved))
                return -ENOSPC;
 
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
@@ -416,8 +420,9 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
                uevent->resp.id = ctx->id;
        }
 
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
-                        &uevent->resp, sizeof uevent->resp)) {
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
+                        &uevent->resp,
+                        min_t(size_t, out_len, sizeof(uevent->resp)))) {
                ret = -EFAULT;
                goto done;
        }
@@ -477,15 +482,15 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
                return -ENOMEM;
 
        ctx->uid = cmd.uid;
-       cm_id = rdma_create_id(current->nsproxy->net_ns,
-                              ucma_event_handler, ctx, cmd.ps, qp_type);
+       cm_id = __rdma_create_id(current->nsproxy->net_ns,
+                                ucma_event_handler, ctx, cmd.ps, qp_type, NULL);
        if (IS_ERR(cm_id)) {
                ret = PTR_ERR(cm_id);
                goto err1;
        }
 
        resp.id = ctx->id;
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp))) {
                ret = -EFAULT;
                goto err2;
@@ -615,7 +620,7 @@ static ssize_t ucma_destroy_id(struct ucma_file *file, const char __user *inbuf,
        }
 
        resp.events_reported = ucma_free_ctx(ctx);
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                ret = -EFAULT;
 
@@ -845,7 +850,7 @@ static ssize_t ucma_query_route(struct ucma_file *file,
                ucma_copy_iw_route(&resp, &ctx->cm_id->route);
 
 out:
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                ret = -EFAULT;
 
@@ -991,7 +996,7 @@ static ssize_t ucma_query(struct ucma_file *file,
        if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
                return -EFAULT;
 
-       response = (void __user *)(unsigned long) cmd.response;
+       response = u64_to_user_ptr(cmd.response);
        ctx = ucma_get_ctx(file, cmd.id);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
@@ -1094,12 +1099,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
        if (cmd.conn_param.valid) {
                ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param);
                mutex_lock(&file->mut);
-               ret = rdma_accept(ctx->cm_id, &conn_param);
+               ret = __rdma_accept(ctx->cm_id, &conn_param, NULL);
                if (!ret)
                        ctx->uid = cmd.uid;
                mutex_unlock(&file->mut);
        } else
-               ret = rdma_accept(ctx->cm_id, NULL);
+               ret = __rdma_accept(ctx->cm_id, NULL, NULL);
 
        ucma_put_ctx(ctx);
        return ret;
@@ -1179,7 +1184,7 @@ static ssize_t ucma_init_qp_attr(struct ucma_file *file,
                goto out;
 
        ib_copy_qp_attr_to_user(ctx->cm_id->device, &resp, &qp_attr);
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                ret = -EFAULT;
 
@@ -1241,6 +1246,9 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
        if (!optlen)
                return -EINVAL;
 
+       if (!ctx->cm_id->device)
+               return -EINVAL;
+
        memset(&sa_path, 0, sizeof(sa_path));
 
        sa_path.rec_type = SA_PATH_REC_TYPE_IB;
@@ -1315,7 +1323,7 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
        if (unlikely(cmd.optlen > KMALLOC_MAX_SIZE))
                return -EINVAL;
 
-       optval = memdup_user((void __user *) (unsigned long) cmd.optval,
+       optval = memdup_user(u64_to_user_ptr(cmd.optval),
                             cmd.optlen);
        if (IS_ERR(optval)) {
                ret = PTR_ERR(optval);
@@ -1395,7 +1403,7 @@ static ssize_t ucma_process_join(struct ucma_file *file,
                goto err2;
 
        resp.id = mc->id;
-       if (copy_to_user((void __user *)(unsigned long) cmd->response,
+       if (copy_to_user(u64_to_user_ptr(cmd->response),
                         &resp, sizeof(resp))) {
                ret = -EFAULT;
                goto err3;
@@ -1500,7 +1508,7 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file,
        resp.events_reported = mc->events_reported;
        kfree(mc);
 
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                ret = -EFAULT;
 out:
@@ -1587,7 +1595,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
        ucma_unlock_files(cur_file, new_file);
 
 response:
-       if (copy_to_user((void __user *)(unsigned long)cmd.response,
+       if (copy_to_user(u64_to_user_ptr(cmd.response),
                         &resp, sizeof(resp)))
                ret = -EFAULT;
 
index deccefb..cfb5161 100644 (file)
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/uverbs_std_types.h>
+
+#define UVERBS_MODULE_NAME ib_uverbs
+#include <rdma/uverbs_named_ioctl.h>
 
 static inline void
 ib_uverbs_init_udata(struct ib_udata *udata,
@@ -199,11 +203,18 @@ struct ib_ucq_object {
        u32                     async_events_reported;
 };
 
+struct ib_uflow_resources;
+struct ib_uflow_object {
+       struct ib_uobject               uobject;
+       struct ib_uflow_resources       *resources;
+};
+
 extern const struct file_operations uverbs_event_fops;
 void ib_uverbs_init_event_queue(struct ib_uverbs_event_queue *ev_queue);
 struct file *ib_uverbs_alloc_async_event_file(struct ib_uverbs_file *uverbs_file,
                                              struct ib_device *ib_dev);
 void ib_uverbs_free_async_event_file(struct ib_uverbs_file *uverbs_file);
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res);
 
 void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
                           struct ib_uverbs_completion_event_file *ev_file,
@@ -226,7 +237,13 @@ int uverbs_dealloc_mw(struct ib_mw *mw);
 void ib_uverbs_detach_umcast(struct ib_qp *qp,
                             struct ib_uqp_object *uobj);
 
+void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
+extern const struct uverbs_attr_def uverbs_uhw_compat_in;
+extern const struct uverbs_attr_def uverbs_uhw_compat_out;
 long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+                              struct ib_uverbs_file *file,
+                              struct uverbs_attr_bundle *attrs);
 
 struct ib_uverbs_flow_spec {
        union {
@@ -240,13 +257,37 @@ struct ib_uverbs_flow_spec {
                };
                struct ib_uverbs_flow_spec_eth     eth;
                struct ib_uverbs_flow_spec_ipv4    ipv4;
+               struct ib_uverbs_flow_spec_esp     esp;
                struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
                struct ib_uverbs_flow_spec_ipv6    ipv6;
                struct ib_uverbs_flow_spec_action_tag   flow_tag;
                struct ib_uverbs_flow_spec_action_drop  drop;
+               struct ib_uverbs_flow_spec_action_handle action;
        };
 };
 
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+                                         const void *kern_spec_mask,
+                                         const void *kern_spec_val,
+                                         size_t kern_filter_sz,
+                                         union ib_flow_spec *ib_spec);
+
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DEVICE);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_PD);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MR);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_CQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_QP);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_AH);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_MW);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_SRQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_WQ);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
+extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
+
 #define IB_UVERBS_DECLARE_CMD(name)                                    \
        ssize_t ib_uverbs_##name(struct ib_uverbs_file *file,           \
                                 struct ib_device *ib_dev,              \
index a148de3..13cb5e4 100644 (file)
@@ -50,7 +50,7 @@
 static struct ib_uverbs_completion_event_file *
 ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
 {
-       struct ib_uobject *uobj = uobj_get_read(uobj_get_type(comp_channel),
+       struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
                                                fd, context);
        struct ib_uobject_file *uobj_file;
 
@@ -322,7 +322,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
                    in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                    out_len - sizeof(resp));
 
-       uobj  = uobj_alloc(uobj_get_type(pd), file->ucontext);
+       uobj  = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
@@ -372,7 +372,7 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       uobj  = uobj_get_write(uobj_get_type(pd), cmd.pd_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -517,7 +517,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
                }
        }
 
-       obj  = (struct ib_uxrcd_object *)uobj_alloc(uobj_get_type(xrcd),
+       obj  = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD,
                                                    file->ucontext);
        if (IS_ERR(obj)) {
                ret = PTR_ERR(obj);
@@ -602,7 +602,7 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       uobj  = uobj_get_write(uobj_get_type(xrcd), cmd.xrcd_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -663,11 +663,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        if (ret)
                return ret;
 
-       uobj  = uobj_alloc(uobj_get_type(mr), file->ucontext);
+       uobj  = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
-       pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
        if (!pd) {
                ret = -EINVAL;
                goto err_free;
@@ -693,6 +693,8 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
        mr->pd      = pd;
        mr->uobject = uobj;
        atomic_inc(&pd->usecnt);
+       mr->res.type = RDMA_RESTRACK_MR;
+       rdma_restrack_add(&mr->res);
 
        uobj->object = mr;
 
@@ -756,7 +758,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
             (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
                        return -EINVAL;
 
-       uobj  = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -770,7 +772,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
        }
 
        if (cmd.flags & IB_MR_REREG_PD) {
-               pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+               pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
                if (!pd) {
                        ret = -EINVAL;
                        goto put_uobjs;
@@ -822,7 +824,7 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       uobj  = uobj_get_write(uobj_get_type(mr), cmd.mr_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -851,11 +853,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof(cmd)))
                return -EFAULT;
 
-       uobj  = uobj_alloc(uobj_get_type(mw), file->ucontext);
+       uobj  = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
-       pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
        if (!pd) {
                ret = -EINVAL;
                goto err_free;
@@ -914,7 +916,7 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof(cmd)))
                return -EFAULT;
 
-       uobj  = uobj_get_write(uobj_get_type(mw), cmd.mw_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -939,7 +941,7 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       uobj = uobj_alloc(uobj_get_type(comp_channel), file->ucontext);
+       uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
@@ -984,7 +986,7 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
        if (cmd->comp_vector >= file->device->num_comp_vectors)
                return ERR_PTR(-EINVAL);
 
-       obj  = (struct ib_ucq_object *)uobj_alloc(uobj_get_type(cq),
+       obj  = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ,
                                                  file->ucontext);
        if (IS_ERR(obj))
                return obj;
@@ -1173,7 +1175,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                   out_len - sizeof(resp));
 
-       cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
        if (!cq)
                return -EINVAL;
 
@@ -1238,7 +1240,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
        if (!cq)
                return -EINVAL;
 
@@ -1285,7 +1287,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
        if (!cq)
                return -EINVAL;
 
@@ -1312,7 +1314,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       uobj  = uobj_get_write(uobj_get_type(cq), cmd.cq_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -1371,7 +1373,7 @@ static int create_qp(struct ib_uverbs_file *file,
        if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
                return -EPERM;
 
-       obj  = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+       obj  = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
                                                  file->ucontext);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
@@ -1382,7 +1384,7 @@ static int create_qp(struct ib_uverbs_file *file,
        if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
                      sizeof(cmd->rwq_ind_tbl_handle) &&
                      (cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
-               ind_tbl = uobj_get_obj_read(rwq_ind_table,
+               ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL,
                                            cmd->rwq_ind_tbl_handle,
                                            file->ucontext);
                if (!ind_tbl) {
@@ -1409,7 +1411,7 @@ static int create_qp(struct ib_uverbs_file *file,
                has_sq = false;
 
        if (cmd->qp_type == IB_QPT_XRC_TGT) {
-               xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->pd_handle,
+               xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
                                          file->ucontext);
 
                if (IS_ERR(xrcd_uobj)) {
@@ -1429,7 +1431,7 @@ static int create_qp(struct ib_uverbs_file *file,
                        cmd->max_recv_sge = 0;
                } else {
                        if (cmd->is_srq) {
-                               srq = uobj_get_obj_read(srq, cmd->srq_handle,
+                               srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle,
                                                        file->ucontext);
                                if (!srq || srq->srq_type == IB_SRQT_XRC) {
                                        ret = -EINVAL;
@@ -1439,7 +1441,7 @@ static int create_qp(struct ib_uverbs_file *file,
 
                        if (!ind_tbl) {
                                if (cmd->recv_cq_handle != cmd->send_cq_handle) {
-                                       rcq = uobj_get_obj_read(cq, cmd->recv_cq_handle,
+                                       rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle,
                                                                file->ucontext);
                                        if (!rcq) {
                                                ret = -EINVAL;
@@ -1450,11 +1452,11 @@ static int create_qp(struct ib_uverbs_file *file,
                }
 
                if (has_sq)
-                       scq = uobj_get_obj_read(cq, cmd->send_cq_handle,
+                       scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle,
                                                file->ucontext);
                if (!ind_tbl)
                        rcq = rcq ?: scq;
-               pd  = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
+               pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
                if (!pd || (!scq && has_sq)) {
                        ret = -EINVAL;
                        goto err_put;
@@ -1751,12 +1753,12 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                   out_len - sizeof(resp));
 
-       obj  = (struct ib_uqp_object *)uobj_alloc(uobj_get_type(qp),
+       obj  = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
                                                  file->ucontext);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
 
-       xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd.pd_handle,
+       xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle,
                                  file->ucontext);
        if (IS_ERR(xrcd_uobj)) {
                ret = -EINVAL;
@@ -1859,7 +1861,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
                goto out;
        }
 
-       qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
        if (!qp) {
                ret = -EINVAL;
                goto out;
@@ -1964,7 +1966,7 @@ static int modify_qp(struct ib_uverbs_file *file,
        if (!attr)
                return -ENOMEM;
 
-       qp = uobj_get_obj_read(qp, cmd->base.qp_handle, file->ucontext);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext);
        if (!qp) {
                ret = -EINVAL;
                goto out;
@@ -1989,6 +1991,13 @@ static int modify_qp(struct ib_uverbs_file *file,
                goto release_qp;
        }
 
+       if ((cmd->base.attr_mask & IB_QP_CUR_STATE &&
+           cmd->base.cur_qp_state > IB_QPS_ERR) ||
+           cmd->base.qp_state > IB_QPS_ERR) {
+               ret = -EINVAL;
+               goto release_qp;
+       }
+
        attr->qp_state            = cmd->base.qp_state;
        attr->cur_qp_state        = cmd->base.cur_qp_state;
        attr->path_mtu            = cmd->base.path_mtu;
@@ -2112,7 +2121,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
 
        memset(&resp, 0, sizeof resp);
 
-       uobj  = uobj_get_write(uobj_get_type(qp), cmd.qp_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -2178,7 +2187,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
        if (!user_wr)
                return -ENOMEM;
 
-       qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
        if (!qp)
                goto out;
 
@@ -2214,7 +2223,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                                goto out_put;
                        }
 
-                       ud->ah = uobj_get_obj_read(ah, user_wr->wr.ud.ah,
+                       ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah,
                                                   file->ucontext);
                        if (!ud->ah) {
                                kfree(ud);
@@ -2449,7 +2458,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
        if (IS_ERR(wr))
                return PTR_ERR(wr);
 
-       qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
        if (!qp)
                goto out;
 
@@ -2498,7 +2507,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
        if (IS_ERR(wr))
                return PTR_ERR(wr);
 
-       srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+       srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
        if (!srq)
                goto out;
 
@@ -2555,11 +2564,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
                   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
                   out_len - sizeof(resp));
 
-       uobj  = uobj_alloc(uobj_get_type(ah), file->ucontext);
+       uobj  = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
 
-       pd = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+       pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
        if (!pd) {
                ret = -EINVAL;
                goto err;
@@ -2627,7 +2636,7 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       uobj  = uobj_get_write(uobj_get_type(ah), cmd.ah_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -2650,7 +2659,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
        if (!qp)
                return -EINVAL;
 
@@ -2701,7 +2710,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
        if (!qp)
                return -EINVAL;
 
@@ -2730,8 +2739,52 @@ out_put:
        return ret ? ret : in_len;
 }
 
-static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
-                                      union ib_flow_spec *ib_spec)
+struct ib_uflow_resources {
+       size_t                  max;
+       size_t                  num;
+       struct ib_flow_action   *collection[0];
+};
+
+static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
+{
+       struct ib_uflow_resources *resources;
+
+       resources =
+               kmalloc(sizeof(*resources) +
+                       num_specs * sizeof(*resources->collection), GFP_KERNEL);
+
+       if (!resources)
+               return NULL;
+
+       resources->num = 0;
+       resources->max = num_specs;
+
+       return resources;
+}
+
+void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
+{
+       unsigned int i;
+
+       for (i = 0; i < uflow_res->num; i++)
+               atomic_dec(&uflow_res->collection[i]->usecnt);
+
+       kfree(uflow_res);
+}
+
+static void flow_resources_add(struct ib_uflow_resources *uflow_res,
+                              struct ib_flow_action *action)
+{
+       WARN_ON(uflow_res->num >= uflow_res->max);
+
+       atomic_inc(&action->usecnt);
+       uflow_res->collection[uflow_res->num++] = action;
+}
+
+static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
+                                      struct ib_uverbs_flow_spec *kern_spec,
+                                      union ib_flow_spec *ib_spec,
+                                      struct ib_uflow_resources *uflow_res)
 {
        ib_spec->type = kern_spec->type;
        switch (ib_spec->type) {
@@ -2750,19 +2803,34 @@ static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
 
                ib_spec->drop.size = sizeof(struct ib_flow_spec_action_drop);
                break;
+       case IB_FLOW_SPEC_ACTION_HANDLE:
+               if (kern_spec->action.size !=
+                   sizeof(struct ib_uverbs_flow_spec_action_handle))
+                       return -EOPNOTSUPP;
+               ib_spec->action.act = uobj_get_obj_read(flow_action,
+                                                       UVERBS_OBJECT_FLOW_ACTION,
+                                                       kern_spec->action.handle,
+                                                       ucontext);
+               if (!ib_spec->action.act)
+                       return -EINVAL;
+               ib_spec->action.size =
+                       sizeof(struct ib_flow_spec_action_handle);
+               flow_resources_add(uflow_res, ib_spec->action.act);
+               uobj_put_obj_read(ib_spec->action.act);
+               break;
        default:
                return -EINVAL;
        }
        return 0;
 }
 
-static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
+static size_t kern_spec_filter_sz(const struct ib_uverbs_flow_spec_hdr *spec)
 {
        /* Returns user space filter size, includes padding */
        return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2;
 }
 
-static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
+static ssize_t spec_filter_size(const void *kern_spec_filter, u16 kern_filter_size,
                                u16 ib_real_filter_sz)
 {
        /*
@@ -2780,28 +2848,21 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
        return kern_filter_size;
 }
 
-static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
-                                      union ib_flow_spec *ib_spec)
+int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
+                                         const void *kern_spec_mask,
+                                         const void *kern_spec_val,
+                                         size_t kern_filter_sz,
+                                         union ib_flow_spec *ib_spec)
 {
        ssize_t actual_filter_sz;
-       ssize_t kern_filter_sz;
        ssize_t ib_filter_sz;
-       void *kern_spec_mask;
-       void *kern_spec_val;
 
-       if (kern_spec->reserved)
-               return -EINVAL;
-
-       ib_spec->type = kern_spec->type;
-
-       kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
        /* User flow spec size must be aligned to 4 bytes */
        if (kern_filter_sz != ALIGN(kern_filter_sz, 4))
                return -EINVAL;
 
-       kern_spec_val = (void *)kern_spec +
-               sizeof(struct ib_uverbs_flow_spec_hdr);
-       kern_spec_mask = kern_spec_val + kern_filter_sz;
+       ib_spec->type = type;
+
        if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
                return -EINVAL;
 
@@ -2870,20 +2931,56 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
                    (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
                        return -EINVAL;
                break;
+       case IB_FLOW_SPEC_ESP:
+               ib_filter_sz = offsetof(struct ib_flow_esp_filter, real_sz);
+               actual_filter_sz = spec_filter_size(kern_spec_mask,
+                                                   kern_filter_sz,
+                                                   ib_filter_sz);
+               if (actual_filter_sz <= 0)
+                       return -EINVAL;
+               ib_spec->esp.size = sizeof(struct ib_flow_spec_esp);
+               memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz);
+               memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz);
+               break;
        default:
                return -EINVAL;
        }
        return 0;
 }
 
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
-                               union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+                                      union ib_flow_spec *ib_spec)
+{
+       ssize_t kern_filter_sz;
+       void *kern_spec_mask;
+       void *kern_spec_val;
+
+       if (kern_spec->reserved)
+               return -EINVAL;
+
+       kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
+
+       kern_spec_val = (void *)kern_spec +
+               sizeof(struct ib_uverbs_flow_spec_hdr);
+       kern_spec_mask = kern_spec_val + kern_filter_sz;
+
+       return ib_uverbs_kern_spec_to_ib_spec_filter(kern_spec->type,
+                                                    kern_spec_mask,
+                                                    kern_spec_val,
+                                                    kern_filter_sz, ib_spec);
+}
+
+static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
+                               struct ib_uverbs_flow_spec *kern_spec,
+                               union ib_flow_spec *ib_spec,
+                               struct ib_uflow_resources *uflow_res)
 {
        if (kern_spec->reserved)
                return -EINVAL;
 
        if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
-               return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+               return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec,
+                                                  uflow_res);
        else
                return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
 }
@@ -2925,18 +3022,18 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
        if (cmd.comp_mask)
                return -EOPNOTSUPP;
 
-       obj  = (struct ib_uwq_object *)uobj_alloc(uobj_get_type(wq),
+       obj  = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ,
                                                  file->ucontext);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
 
-       pd  = uobj_get_obj_read(pd, cmd.pd_handle, file->ucontext);
+       pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
        if (!pd) {
                err = -EINVAL;
                goto err_uobj;
        }
 
-       cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
        if (!cq) {
                err = -EINVAL;
                goto err_put_pd;
@@ -3040,7 +3137,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
                return -EOPNOTSUPP;
 
        resp.response_length = required_resp_len;
-       uobj  = uobj_get_write(uobj_get_type(wq), cmd.wq_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -3091,7 +3188,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
        if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
                return -EINVAL;
 
-       wq = uobj_get_obj_read(wq, cmd.wq_handle, file->ucontext);
+       wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext);
        if (!wq)
                return -EINVAL;
 
@@ -3185,7 +3282,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
 
        for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
                        num_read_wqs++) {
-               wq = uobj_get_obj_read(wq, wqs_handles[num_read_wqs],
+               wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs],
                                       file->ucontext);
                if (!wq) {
                        err = -EINVAL;
@@ -3195,7 +3292,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
                wqs[num_read_wqs] = wq;
        }
 
-       uobj  = uobj_alloc(uobj_get_type(rwq_ind_table), file->ucontext);
+       uobj  = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext);
        if (IS_ERR(uobj)) {
                err = PTR_ERR(uobj);
                goto put_wqs;
@@ -3282,7 +3379,7 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
        if (cmd.comp_mask)
                return -EOPNOTSUPP;
 
-       uobj  = uobj_get_write(uobj_get_type(rwq_ind_table), cmd.ind_tbl_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -3298,10 +3395,12 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
        struct ib_uverbs_create_flow      cmd;
        struct ib_uverbs_create_flow_resp resp;
        struct ib_uobject                 *uobj;
+       struct ib_uflow_object            *uflow;
        struct ib_flow                    *flow_id;
        struct ib_uverbs_flow_attr        *kern_flow_attr;
        struct ib_flow_attr               *flow_attr;
        struct ib_qp                      *qp;
+       struct ib_uflow_resources         *uflow_res;
        int err = 0;
        void *kern_spec;
        void *ib_spec;
@@ -3361,13 +3460,13 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
                kern_flow_attr = &cmd.flow_attr;
        }
 
-       uobj  = uobj_alloc(uobj_get_type(flow), file->ucontext);
+       uobj  = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext);
        if (IS_ERR(uobj)) {
                err = PTR_ERR(uobj);
                goto err_free_attr;
        }
 
-       qp = uobj_get_obj_read(qp, cmd.qp_handle, file->ucontext);
+       qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
        if (!qp) {
                err = -EINVAL;
                goto err_uobj;
@@ -3379,6 +3478,11 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
                err = -ENOMEM;
                goto err_put;
        }
+       uflow_res = flow_resources_alloc(cmd.flow_attr.num_of_specs);
+       if (!uflow_res) {
+               err = -ENOMEM;
+               goto err_free_flow_attr;
+       }
 
        flow_attr->type = kern_flow_attr->type;
        flow_attr->priority = kern_flow_attr->priority;
@@ -3393,7 +3497,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
             cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
             cmd.flow_attr.size >=
             ((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
-               err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+               err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec,
+                                          uflow_res);
                if (err)
                        goto err_free;
                flow_attr->size +=
@@ -3415,6 +3520,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
        }
        flow_id->uobject = uobj;
        uobj->object = flow_id;
+       uflow = container_of(uobj, typeof(*uflow), uobject);
+       uflow->resources = uflow_res;
 
        memset(&resp, 0, sizeof(resp));
        resp.flow_handle = uobj->id;
@@ -3433,6 +3540,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 err_copy:
        ib_destroy_flow(flow_id);
 err_free:
+       ib_uverbs_flow_resources_free(uflow_res);
+err_free_flow_attr:
        kfree(flow_attr);
 err_put:
        uobj_put_obj_read(qp);
@@ -3463,7 +3572,7 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
        if (cmd.comp_mask)
                return -EINVAL;
 
-       uobj  = uobj_get_write(uobj_get_type(flow), cmd.flow_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -3485,7 +3594,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
        struct ib_srq_init_attr          attr;
        int ret;
 
-       obj  = (struct ib_usrq_object *)uobj_alloc(uobj_get_type(srq),
+       obj  = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ,
                                                   file->ucontext);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
@@ -3494,7 +3603,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
                attr.ext.tag_matching.max_num_tags = cmd->max_num_tags;
 
        if (cmd->srq_type == IB_SRQT_XRC) {
-               xrcd_uobj = uobj_get_read(uobj_get_type(xrcd), cmd->xrcd_handle,
+               xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
                                          file->ucontext);
                if (IS_ERR(xrcd_uobj)) {
                        ret = -EINVAL;
@@ -3512,7 +3621,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
        }
 
        if (ib_srq_has_cq(cmd->srq_type)) {
-               attr.ext.cq  = uobj_get_obj_read(cq, cmd->cq_handle,
+               attr.ext.cq  = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle,
                                                 file->ucontext);
                if (!attr.ext.cq) {
                        ret = -EINVAL;
@@ -3520,7 +3629,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
                }
        }
 
-       pd  = uobj_get_obj_read(pd, cmd->pd_handle, file->ucontext);
+       pd  = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
        if (!pd) {
                ret = -EINVAL;
                goto err_put_cq;
@@ -3572,7 +3681,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
        if (cmd->srq_type == IB_SRQT_XRC)
                resp.srqn = srq->ext.xrc.srq_num;
 
-       if (copy_to_user((void __user *) (unsigned long) cmd->response,
+       if (copy_to_user(u64_to_user_ptr(cmd->response),
                         &resp, sizeof resp)) {
                ret = -EFAULT;
                goto err_copy;
@@ -3692,7 +3801,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
        ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
                   out_len);
 
-       srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+       srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
        if (!srq)
                return -EINVAL;
 
@@ -3723,7 +3832,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       srq = uobj_get_obj_read(srq, cmd.srq_handle, file->ucontext);
+       srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
        if (!srq)
                return -EINVAL;
 
@@ -3760,7 +3869,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       uobj  = uobj_get_write(uobj_get_type(srq), cmd.srq_handle,
+       uobj  = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle,
                               file->ucontext);
        if (IS_ERR(uobj))
                return PTR_ERR(uobj);
@@ -3897,6 +4006,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
        resp.cq_moderation_caps.max_cq_moderation_period =
                attr.cq_caps.max_cq_moderation_period;
        resp.response_length += sizeof(resp.cq_moderation_caps);
+
+       if (ucore->outlen < resp.response_length + sizeof(resp.max_dm_size))
+               goto end;
+
+       resp.max_dm_size = attr.max_dm_size;
+       resp.response_length += sizeof(resp.max_dm_size);
 end:
        err = ib_copy_to_udata(ucore, &resp, resp.response_length);
        return err;
@@ -3933,7 +4048,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
        if (cmd.attr_mask > IB_CQ_MODERATE)
                return -EOPNOTSUPP;
 
-       cq = uobj_get_obj_read(cq, cmd.cq_handle, file->ucontext);
+       cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
        if (!cq)
                return -EINVAL;
 
index 339b851..8c93970 100644 (file)
 #include "rdma_core.h"
 #include "uverbs.h"
 
+static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
+                                  u16 len)
+{
+       if (uattr->len > sizeof(((struct ib_uverbs_attr *)0)->data))
+               return ib_is_buffer_cleared(u64_to_user_ptr(uattr->data) + len,
+                                           uattr->len - len);
+
+       return !memchr_inv((const void *)&uattr->data + len,
+                          0, uattr->len - len);
+}
+
 static int uverbs_process_attr(struct ib_device *ibdev,
                               struct ib_ucontext *ucontext,
                               const struct ib_uverbs_attr *uattr,
@@ -44,14 +55,12 @@ static int uverbs_process_attr(struct ib_device *ibdev,
                               struct ib_uverbs_attr __user *uattr_ptr)
 {
        const struct uverbs_attr_spec *spec;
+       const struct uverbs_attr_spec *val_spec;
        struct uverbs_attr *e;
        const struct uverbs_object_spec *object;
        struct uverbs_obj_attr *o_attr;
        struct uverbs_attr *elements = attr_bundle_h->attrs;
 
-       if (uattr->reserved)
-               return -EINVAL;
-
        if (attr_id >= attr_spec_bucket->num_attrs) {
                if (uattr->flags & UVERBS_ATTR_F_MANDATORY)
                        return -EINVAL;
@@ -63,15 +72,46 @@ static int uverbs_process_attr(struct ib_device *ibdev,
                return -EINVAL;
 
        spec = &attr_spec_bucket->attrs[attr_id];
+       val_spec = spec;
        e = &elements[attr_id];
        e->uattr = uattr_ptr;
 
        switch (spec->type) {
+       case UVERBS_ATTR_TYPE_ENUM_IN:
+               if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems)
+                       return -EOPNOTSUPP;
+
+               if (uattr->attr_data.enum_data.reserved)
+                       return -EINVAL;
+
+               val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id];
+
+               /* Currently we only support PTR_IN based enums */
+               if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
+                       return -EOPNOTSUPP;
+
+               e->ptr_attr.enum_id = uattr->attr_data.enum_data.elem_id;
+       /* fall through */
        case UVERBS_ATTR_TYPE_PTR_IN:
+               /* Ensure that any data provided by userspace beyond the known
+                * struct is zero. Userspace that knows how to use some future
+                * longer struct will fail here if used with an old kernel and
+                * non-zero content, making ABI compat/discovery simpler.
+                */
+               if (uattr->len > val_spec->ptr.len &&
+                   val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO &&
+                   !uverbs_is_attr_cleared(uattr, val_spec->ptr.len))
+                       return -EOPNOTSUPP;
+
+       /* fall through */
        case UVERBS_ATTR_TYPE_PTR_OUT:
-               if (uattr->len < spec->len ||
-                   (!(spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ) &&
-                    uattr->len > spec->len))
+               if (uattr->len < val_spec->ptr.min_len ||
+                   (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) &&
+                    uattr->len > val_spec->ptr.len))
+                       return -EINVAL;
+
+               if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
+                   uattr->attr_data.reserved)
                        return -EINVAL;
 
                e->ptr_attr.data = uattr->data;
@@ -84,6 +124,9 @@ static int uverbs_process_attr(struct ib_device *ibdev,
                        return -EINVAL;
        /* fall through */
        case UVERBS_ATTR_TYPE_FD:
+               if (uattr->attr_data.reserved)
+                       return -EINVAL;
+
                if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX)
                        return -EINVAL;
 
@@ -246,6 +289,9 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
        size_t ctx_size;
        uintptr_t data[UVERBS_OPTIMIZE_USING_STACK_SZ / sizeof(uintptr_t)];
 
+       if (hdr->driver_id != ib_dev->driver_id)
+               return -EINVAL;
+
        object_spec = uverbs_get_object(ib_dev, hdr->object_id);
        if (!object_spec)
                return -EPROTONOSUPPORT;
@@ -350,7 +396,7 @@ long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        goto out;
                }
 
-               if (hdr.reserved) {
+               if (hdr.reserved1 || hdr.reserved2) {
                        err = -EPROTONOSUPPORT;
                        goto out;
                }
index 62e1eb1..0f88a19 100644 (file)
@@ -379,7 +379,7 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
                                 "ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
                                 min_id) ||
                            WARN(IS_ATTR_OBJECT(attr) &&
-                                attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ,
+                                attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
                                 "ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
                                 min_id)) {
                                res = -EINVAL;
index b1ca223..4445d8e 100644 (file)
@@ -468,7 +468,7 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
                return;
        }
 
-       entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+       entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
        if (!entry) {
                spin_unlock_irqrestore(&ev_queue->lock, flags);
                return;
@@ -501,7 +501,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
                return;
        }
 
-       entry = kmalloc(sizeof *entry, GFP_ATOMIC);
+       entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
        if (!entry) {
                spin_unlock_irqrestore(&file->async_file->ev_queue.lock, flags);
                return;
@@ -635,39 +635,87 @@ err_put_refs:
        return filp;
 }
 
-static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
+static bool verify_command_mask(struct ib_device *ib_dev,
+                               u32 command, bool extended)
 {
-       u64 mask;
+       if (!extended)
+               return ib_dev->uverbs_cmd_mask & BIT_ULL(command);
 
-       if (command <= IB_USER_VERBS_CMD_OPEN_QP)
-               mask = ib_dev->uverbs_cmd_mask;
-       else
-               mask = ib_dev->uverbs_ex_cmd_mask;
-
-       if (mask & ((u64)1 << command))
-               return 0;
-
-       return -1;
+       return ib_dev->uverbs_ex_cmd_mask & BIT_ULL(command);
 }
 
 static bool verify_command_idx(u32 command, bool extended)
 {
        if (extended)
-               return command < ARRAY_SIZE(uverbs_ex_cmd_table);
+               return command < ARRAY_SIZE(uverbs_ex_cmd_table) &&
+                      uverbs_ex_cmd_table[command];
+
+       return command < ARRAY_SIZE(uverbs_cmd_table) &&
+              uverbs_cmd_table[command];
+}
+
+static ssize_t process_hdr(struct ib_uverbs_cmd_hdr *hdr,
+                          u32 *command, bool *extended)
+{
+       if (hdr->command & ~(u32)(IB_USER_VERBS_CMD_FLAG_EXTENDED |
+                                  IB_USER_VERBS_CMD_COMMAND_MASK))
+               return -EINVAL;
+
+       *command = hdr->command & IB_USER_VERBS_CMD_COMMAND_MASK;
+       *extended = hdr->command & IB_USER_VERBS_CMD_FLAG_EXTENDED;
+
+       if (!verify_command_idx(*command, *extended))
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+
+static ssize_t verify_hdr(struct ib_uverbs_cmd_hdr *hdr,
+                         struct ib_uverbs_ex_cmd_hdr *ex_hdr,
+                         size_t count, bool extended)
+{
+       if (extended) {
+               count -= sizeof(*hdr) + sizeof(*ex_hdr);
+
+               if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count)
+                       return -EINVAL;
+
+               if (ex_hdr->cmd_hdr_reserved)
+                       return -EINVAL;
 
-       return command < ARRAY_SIZE(uverbs_cmd_table);
+               if (ex_hdr->response) {
+                       if (!hdr->out_words && !ex_hdr->provider_out_words)
+                               return -EINVAL;
+
+                       if (!access_ok(VERIFY_WRITE,
+                                      u64_to_user_ptr(ex_hdr->response),
+                                      (hdr->out_words + ex_hdr->provider_out_words) * 8))
+                               return -EFAULT;
+               } else {
+                       if (hdr->out_words || ex_hdr->provider_out_words)
+                               return -EINVAL;
+               }
+
+               return 0;
+       }
+
+       /* not extended command */
+       if (hdr->in_words * 4 != count)
+               return -EINVAL;
+
+       return 0;
 }
 
 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                             size_t count, loff_t *pos)
 {
        struct ib_uverbs_file *file = filp->private_data;
+       struct ib_uverbs_ex_cmd_hdr ex_hdr;
        struct ib_device *ib_dev;
        struct ib_uverbs_cmd_hdr hdr;
-       bool extended_command;
-       __u32 command;
-       __u32 flags;
+       bool extended;
        int srcu_key;
+       u32 command;
        ssize_t ret;
 
        if (!ib_safe_file_access(filp)) {
@@ -676,12 +724,31 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                return -EACCES;
        }
 
-       if (count < sizeof hdr)
+       if (count < sizeof(hdr))
                return -EINVAL;
 
-       if (copy_from_user(&hdr, buf, sizeof hdr))
+       if (copy_from_user(&hdr, buf, sizeof(hdr)))
                return -EFAULT;
 
+       ret = process_hdr(&hdr, &command, &extended);
+       if (ret)
+               return ret;
+
+       if (!file->ucontext &&
+           (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended))
+               return -EINVAL;
+
+       if (extended) {
+               if (count < (sizeof(hdr) + sizeof(ex_hdr)))
+                       return -EINVAL;
+               if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
+                       return -EFAULT;
+       }
+
+       ret = verify_hdr(&hdr, &ex_hdr, count, extended);
+       if (ret)
+               return ret;
+
        srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
        ib_dev = srcu_dereference(file->device->ib_dev,
                                  &file->device->disassociate_srcu);
@@ -690,106 +757,22 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                goto out;
        }
 
-       if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
-                                  IB_USER_VERBS_CMD_COMMAND_MASK)) {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
-       flags = (hdr.command &
-                IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
-
-       extended_command = flags & IB_USER_VERBS_CMD_FLAG_EXTENDED;
-       if (!verify_command_idx(command, extended_command)) {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       if (verify_command_mask(ib_dev, command)) {
+       if (!verify_command_mask(ib_dev, command, extended)) {
                ret = -EOPNOTSUPP;
                goto out;
        }
 
-       if (!file->ucontext &&
-           command != IB_USER_VERBS_CMD_GET_CONTEXT) {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       if (!flags) {
-               if (!uverbs_cmd_table[command]) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-
-               if (hdr.in_words * 4 != count) {
-                       ret = -EINVAL;
-                       goto out;
-               }
+       buf += sizeof(hdr);
 
-               ret = uverbs_cmd_table[command](file, ib_dev,
-                                                buf + sizeof(hdr),
-                                                hdr.in_words * 4,
-                                                hdr.out_words * 4);
-
-       } else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
-               struct ib_uverbs_ex_cmd_hdr ex_hdr;
+       if (!extended) {
+               ret = uverbs_cmd_table[command](file, ib_dev, buf,
+                                               hdr.in_words * 4,
+                                               hdr.out_words * 4);
+       } else {
                struct ib_udata ucore;
                struct ib_udata uhw;
-               size_t written_count = count;
 
-               if (!uverbs_ex_cmd_table[command]) {
-                       ret = -ENOSYS;
-                       goto out;
-               }
-
-               if (!file->ucontext) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-
-               if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-
-               if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
-                       ret = -EFAULT;
-                       goto out;
-               }
-
-               count -= sizeof(hdr) + sizeof(ex_hdr);
-               buf += sizeof(hdr) + sizeof(ex_hdr);
-
-               if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-
-               if (ex_hdr.cmd_hdr_reserved) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-
-               if (ex_hdr.response) {
-                       if (!hdr.out_words && !ex_hdr.provider_out_words) {
-                               ret = -EINVAL;
-                               goto out;
-                       }
-
-                       if (!access_ok(VERIFY_WRITE,
-                                      u64_to_user_ptr(ex_hdr.response),
-                                      (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
-                               ret = -EFAULT;
-                               goto out;
-                       }
-               } else {
-                       if (hdr.out_words || ex_hdr.provider_out_words) {
-                               ret = -EINVAL;
-                               goto out;
-                       }
-               }
+               buf += sizeof(ex_hdr);
 
                ib_uverbs_init_udata_buf_or_null(&ucore, buf,
                                        u64_to_user_ptr(ex_hdr.response),
@@ -802,10 +785,7 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                                        ex_hdr.provider_out_words * 8);
 
                ret = uverbs_ex_cmd_table[command](file, ib_dev, &ucore, &uhw);
-               if (!ret)
-                       ret = written_count;
-       } else {
-               ret = -ENOSYS;
+               ret = (ret) ? : count;
        }
 
 out:
@@ -953,10 +933,8 @@ static const struct file_operations uverbs_fops = {
        .open    = ib_uverbs_open,
        .release = ib_uverbs_close,
        .llseek  = no_llseek,
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
        .unlocked_ioctl = ib_uverbs_ioctl,
        .compat_ioctl = ib_uverbs_ioctl,
-#endif
 };
 
 static const struct file_operations uverbs_mmap_fops = {
@@ -966,10 +944,8 @@ static const struct file_operations uverbs_mmap_fops = {
        .open    = ib_uverbs_open,
        .release = ib_uverbs_close,
        .llseek  = no_llseek,
-#if IS_ENABLED(CONFIG_INFINIBAND_EXP_USER_ACCESS)
        .unlocked_ioctl = ib_uverbs_ioctl,
        .compat_ioctl = ib_uverbs_ioctl,
-#endif
 };
 
 static struct ib_client uverbs_client = {
@@ -1032,7 +1008,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
        if (!device->alloc_ucontext)
                return;
 
-       uverbs_dev = kzalloc(sizeof *uverbs_dev, GFP_KERNEL);
+       uverbs_dev = kzalloc(sizeof(*uverbs_dev), GFP_KERNEL);
        if (!uverbs_dev)
                return;
 
index df1360e..569f48b 100644 (file)
@@ -48,7 +48,16 @@ static int uverbs_free_ah(struct ib_uobject *uobject,
 static int uverbs_free_flow(struct ib_uobject *uobject,
                            enum rdma_remove_reason why)
 {
-       return ib_destroy_flow((struct ib_flow *)uobject->object);
+       int ret;
+       struct ib_flow *flow = (struct ib_flow *)uobject->object;
+       struct ib_uflow_object *uflow =
+               container_of(uobject, struct ib_uflow_object, uobject);
+
+       ret = ib_destroy_flow(flow);
+       if (!ret)
+               ib_uverbs_flow_resources_free(uflow->resources);
+
+       return ret;
 }
 
 static int uverbs_free_mw(struct ib_uobject *uobject,
@@ -135,31 +144,6 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
        return ret;
 }
 
-static int uverbs_free_cq(struct ib_uobject *uobject,
-                         enum rdma_remove_reason why)
-{
-       struct ib_cq *cq = uobject->object;
-       struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
-       struct ib_ucq_object *ucq =
-               container_of(uobject, struct ib_ucq_object, uobject);
-       int ret;
-
-       ret = ib_destroy_cq(cq);
-       if (!ret || why != RDMA_REMOVE_DESTROY)
-               ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
-                                     container_of(ev_queue,
-                                                  struct ib_uverbs_completion_event_file,
-                                                  ev_queue) : NULL,
-                                     ucq);
-       return ret;
-}
-
-static int uverbs_free_mr(struct ib_uobject *uobject,
-                         enum rdma_remove_reason why)
-{
-       return ib_dereg_mr((struct ib_mr *)uobject->object);
-}
-
 static int uverbs_free_xrcd(struct ib_uobject *uobject,
                            enum rdma_remove_reason why)
 {
@@ -210,18 +194,26 @@ static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_
        return 0;
 };
 
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+                              struct ib_uverbs_file *file,
+                              struct uverbs_attr_bundle *attrs)
+{
+       return 0;
+}
+
 /*
  * This spec is used in order to pass information to the hardware driver in a
  * legacy way. Every verb that could get driver specific data should get this
  * spec.
  */
-static const struct uverbs_attr_def uverbs_uhw_compat_in =
-       UVERBS_ATTR_PTR_IN_SZ(UVERBS_UHW_IN, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
-static const struct uverbs_attr_def uverbs_uhw_compat_out =
-       UVERBS_ATTR_PTR_OUT_SZ(UVERBS_UHW_OUT, 0, UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ));
-
-static void create_udata(struct uverbs_attr_bundle *ctx,
-                        struct ib_udata *udata)
+const struct uverbs_attr_def uverbs_uhw_compat_in =
+       UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+                             UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+const struct uverbs_attr_def uverbs_uhw_compat_out =
+       UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX),
+                              UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+
+void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
 {
        /*
         * This is for ease of conversion. The purpose is to convert all drivers
@@ -229,9 +221,9 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
         * Assume attr == 0 is input and attr == 1 is output.
         */
        const struct uverbs_attr *uhw_in =
-               uverbs_attr_get(ctx, UVERBS_UHW_IN);
+               uverbs_attr_get(ctx, UVERBS_ATTR_UHW_IN);
        const struct uverbs_attr *uhw_out =
-               uverbs_attr_get(ctx, UVERBS_UHW_OUT);
+               uverbs_attr_get(ctx, UVERBS_ATTR_UHW_OUT);
 
        if (!IS_ERR(uhw_in)) {
                udata->inlen = uhw_in->ptr_attr.len;
@@ -253,207 +245,67 @@ static void create_udata(struct uverbs_attr_bundle *ctx,
        }
 }
 
-static int uverbs_create_cq_handler(struct ib_device *ib_dev,
-                                   struct ib_uverbs_file *file,
-                                   struct uverbs_attr_bundle *attrs)
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
+                           &UVERBS_TYPE_ALLOC_FD(0,
+                                                 sizeof(struct ib_uverbs_completion_event_file),
+                                                 uverbs_hot_unplug_completion_event_file,
+                                                 &uverbs_event_fops,
+                                                 "[infinibandevent]", O_RDONLY));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
+                           &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
+                                                     uverbs_free_qp));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
+                           &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
+                           &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
+                                                     uverbs_free_srq));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
+                           &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
+                           &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+                                                     0, uverbs_free_flow));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
+                           &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
+                                                     uverbs_free_wq));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
+                           &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
+                           &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
+                                                     uverbs_free_xrcd));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
+                           /* 2 is used in order to free the PD after MRs */
+                           &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
+
+static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_PD),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_MR),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_QP),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_AH),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_MW),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
+                                 &UVERBS_OBJECT(UVERBS_OBJECT_DM));
+
+const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
 {
-       struct ib_ucontext *ucontext = file->ucontext;
-       struct ib_ucq_object           *obj;
-       struct ib_udata uhw;
-       int ret;
-       u64 user_handle;
-       struct ib_cq_init_attr attr = {};
-       struct ib_cq                   *cq;
-       struct ib_uverbs_completion_event_file    *ev_file = NULL;
-       const struct uverbs_attr *ev_file_attr;
-       struct ib_uobject *ev_file_uobj;
-
-       if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
-               return -EOPNOTSUPP;
-
-       ret = uverbs_copy_from(&attr.comp_vector, attrs, CREATE_CQ_COMP_VECTOR);
-       if (!ret)
-               ret = uverbs_copy_from(&attr.cqe, attrs, CREATE_CQ_CQE);
-       if (!ret)
-               ret = uverbs_copy_from(&user_handle, attrs, CREATE_CQ_USER_HANDLE);
-       if (ret)
-               return ret;
-
-       /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
-       if (uverbs_copy_from(&attr.flags, attrs, CREATE_CQ_FLAGS) == -EFAULT)
-               return -EFAULT;
-
-       ev_file_attr = uverbs_attr_get(attrs, CREATE_CQ_COMP_CHANNEL);
-       if (!IS_ERR(ev_file_attr)) {
-               ev_file_uobj = ev_file_attr->obj_attr.uobject;
-
-               ev_file = container_of(ev_file_uobj,
-                                      struct ib_uverbs_completion_event_file,
-                                      uobj_file.uobj);
-               uverbs_uobject_get(ev_file_uobj);
-       }
-
-       if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
-               ret = -EINVAL;
-               goto err_event_file;
-       }
-
-       obj = container_of(uverbs_attr_get(attrs, CREATE_CQ_HANDLE)->obj_attr.uobject,
-                          typeof(*obj), uobject);
-       obj->uverbs_file           = ucontext->ufile;
-       obj->comp_events_reported  = 0;
-       obj->async_events_reported = 0;
-       INIT_LIST_HEAD(&obj->comp_list);
-       INIT_LIST_HEAD(&obj->async_list);
-
-       /* Temporary, only until drivers get the new uverbs_attr_bundle */
-       create_udata(attrs, &uhw);
-
-       cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
-       if (IS_ERR(cq)) {
-               ret = PTR_ERR(cq);
-               goto err_event_file;
-       }
-
-       cq->device        = ib_dev;
-       cq->uobject       = &obj->uobject;
-       cq->comp_handler  = ib_uverbs_comp_handler;
-       cq->event_handler = ib_uverbs_cq_event_handler;
-       cq->cq_context    = ev_file ? &ev_file->ev_queue : NULL;
-       obj->uobject.object = cq;
-       obj->uobject.user_handle = user_handle;
-       atomic_set(&cq->usecnt, 0);
-       cq->res.type = RDMA_RESTRACK_CQ;
-       rdma_restrack_add(&cq->res);
-
-       ret = uverbs_copy_to(attrs, CREATE_CQ_RESP_CQE, &cq->cqe,
-                            sizeof(cq->cqe));
-       if (ret)
-               goto err_cq;
-
-       return 0;
-err_cq:
-       ib_destroy_cq(cq);
-
-err_event_file:
-       if (ev_file)
-               uverbs_uobject_put(ev_file_uobj);
-       return ret;
-};
-
-static DECLARE_UVERBS_METHOD(
-       uverbs_method_cq_create, UVERBS_CQ_CREATE, uverbs_create_cq_handler,
-       &UVERBS_ATTR_IDR(CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ, UVERBS_ACCESS_NEW,
-                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-       &UVERBS_ATTR_PTR_IN(CREATE_CQ_CQE, u32,
-                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-       &UVERBS_ATTR_PTR_IN(CREATE_CQ_USER_HANDLE, u64,
-                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-       &UVERBS_ATTR_FD(CREATE_CQ_COMP_CHANNEL, UVERBS_OBJECT_COMP_CHANNEL,
-                       UVERBS_ACCESS_READ),
-       &UVERBS_ATTR_PTR_IN(CREATE_CQ_COMP_VECTOR, u32,
-                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-       &UVERBS_ATTR_PTR_IN(CREATE_CQ_FLAGS, u32),
-       &UVERBS_ATTR_PTR_OUT(CREATE_CQ_RESP_CQE, u32,
-                            UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-       &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
-
-static int uverbs_destroy_cq_handler(struct ib_device *ib_dev,
-                                    struct ib_uverbs_file *file,
-                                    struct uverbs_attr_bundle *attrs)
-{
-       struct ib_uverbs_destroy_cq_resp resp;
-       struct ib_uobject *uobj =
-               uverbs_attr_get(attrs, DESTROY_CQ_HANDLE)->obj_attr.uobject;
-       struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
-                                                uobject);
-       int ret;
-
-       if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
-               return -EOPNOTSUPP;
-
-       ret = rdma_explicit_destroy(uobj);
-       if (ret)
-               return ret;
-
-       resp.comp_events_reported  = obj->comp_events_reported;
-       resp.async_events_reported = obj->async_events_reported;
-
-       return uverbs_copy_to(attrs, DESTROY_CQ_RESP, &resp, sizeof(resp));
+       return &uverbs_default_objects;
 }
-
-static DECLARE_UVERBS_METHOD(
-       uverbs_method_cq_destroy, UVERBS_CQ_DESTROY, uverbs_destroy_cq_handler,
-       &UVERBS_ATTR_IDR(DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
-                        UVERBS_ACCESS_DESTROY,
-                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
-       &UVERBS_ATTR_PTR_OUT(DESTROY_CQ_RESP, struct ib_uverbs_destroy_cq_resp,
-                            UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_comp_channel,
-                     UVERBS_OBJECT_COMP_CHANNEL,
-                     &UVERBS_TYPE_ALLOC_FD(0,
-                                             sizeof(struct ib_uverbs_completion_event_file),
-                                             uverbs_hot_unplug_completion_event_file,
-                                             &uverbs_event_fops,
-                                             "[infinibandevent]", O_RDONLY));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_cq, UVERBS_OBJECT_CQ,
-                     &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
-                                                 uverbs_free_cq),
-                     &uverbs_method_cq_create,
-                     &uverbs_method_cq_destroy);
-
-DECLARE_UVERBS_OBJECT(uverbs_object_qp, UVERBS_OBJECT_QP,
-                     &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
-                                                 uverbs_free_qp));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_mw, UVERBS_OBJECT_MW,
-                     &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_mr, UVERBS_OBJECT_MR,
-                     /* 1 is used in order to free the MR after all the MWs */
-                     &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_srq, UVERBS_OBJECT_SRQ,
-                     &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
-                                                 uverbs_free_srq));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_ah, UVERBS_OBJECT_AH,
-                     &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_flow, UVERBS_OBJECT_FLOW,
-                     &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_wq, UVERBS_OBJECT_WQ,
-                     &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
-                                                 uverbs_free_wq));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_rwq_ind_table,
-                     UVERBS_OBJECT_RWQ_IND_TBL,
-                     &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_xrcd, UVERBS_OBJECT_XRCD,
-                     &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
-                                                 uverbs_free_xrcd));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_pd, UVERBS_OBJECT_PD,
-                     /* 2 is used in order to free the PD after MRs */
-                     &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
-
-DECLARE_UVERBS_OBJECT(uverbs_object_device, UVERBS_OBJECT_DEVICE, NULL);
-
-DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
-                          &uverbs_object_device,
-                          &uverbs_object_pd,
-                          &uverbs_object_mr,
-                          &uverbs_object_comp_channel,
-                          &uverbs_object_cq,
-                          &uverbs_object_qp,
-                          &uverbs_object_ah,
-                          &uverbs_object_mw,
-                          &uverbs_object_srq,
-                          &uverbs_object_flow,
-                          &uverbs_object_wq,
-                          &uverbs_object_rwq_ind_table,
-                          &uverbs_object_xrcd);
+EXPORT_SYMBOL_GPL(uverbs_default_get_objects);
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
new file mode 100644 (file)
index 0000000..b0dbae9
--- /dev/null
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/uverbs_std_types.h>
+#include "rdma_core.h"
+#include "uverbs.h"
+
+static int uverbs_free_cq(struct ib_uobject *uobject,
+                         enum rdma_remove_reason why)
+{
+       struct ib_cq *cq = uobject->object;
+       struct ib_uverbs_event_queue *ev_queue = cq->cq_context;
+       struct ib_ucq_object *ucq =
+               container_of(uobject, struct ib_ucq_object, uobject);
+       int ret;
+
+       ret = ib_destroy_cq(cq);
+       if (!ret || why != RDMA_REMOVE_DESTROY)
+               ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
+                                     container_of(ev_queue,
+                                                  struct ib_uverbs_completion_event_file,
+                                                  ev_queue) : NULL,
+                                     ucq);
+       return ret;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
+                                                  struct ib_uverbs_file *file,
+                                                  struct uverbs_attr_bundle *attrs)
+{
+       struct ib_ucontext *ucontext = file->ucontext;
+       struct ib_ucq_object           *obj;
+       struct ib_udata uhw;
+       int ret;
+       u64 user_handle;
+       struct ib_cq_init_attr attr = {};
+       struct ib_cq                   *cq;
+       struct ib_uverbs_completion_event_file    *ev_file = NULL;
+       const struct uverbs_attr *ev_file_attr;
+       struct ib_uobject *ev_file_uobj;
+
+       if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
+               return -EOPNOTSUPP;
+
+       ret = uverbs_copy_from(&attr.comp_vector, attrs,
+                              UVERBS_ATTR_CREATE_CQ_COMP_VECTOR);
+       if (!ret)
+               ret = uverbs_copy_from(&attr.cqe, attrs,
+                                      UVERBS_ATTR_CREATE_CQ_CQE);
+       if (!ret)
+               ret = uverbs_copy_from(&user_handle, attrs,
+                                      UVERBS_ATTR_CREATE_CQ_USER_HANDLE);
+       if (ret)
+               return ret;
+
+       /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+       if (IS_UVERBS_COPY_ERR(uverbs_copy_from(&attr.flags, attrs,
+                                               UVERBS_ATTR_CREATE_CQ_FLAGS)))
+               return -EFAULT;
+
+       ev_file_attr = uverbs_attr_get(attrs, UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL);
+       if (!IS_ERR(ev_file_attr)) {
+               ev_file_uobj = ev_file_attr->obj_attr.uobject;
+
+               ev_file = container_of(ev_file_uobj,
+                                      struct ib_uverbs_completion_event_file,
+                                      uobj_file.uobj);
+               uverbs_uobject_get(ev_file_uobj);
+       }
+
+       if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
+               ret = -EINVAL;
+               goto err_event_file;
+       }
+
+       obj = container_of(uverbs_attr_get(attrs,
+                                          UVERBS_ATTR_CREATE_CQ_HANDLE)->obj_attr.uobject,
+                          typeof(*obj), uobject);
+       obj->uverbs_file           = ucontext->ufile;
+       obj->comp_events_reported  = 0;
+       obj->async_events_reported = 0;
+       INIT_LIST_HEAD(&obj->comp_list);
+       INIT_LIST_HEAD(&obj->async_list);
+
+       /* Temporary, only until drivers get the new uverbs_attr_bundle */
+       create_udata(attrs, &uhw);
+
+       cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
+       if (IS_ERR(cq)) {
+               ret = PTR_ERR(cq);
+               goto err_event_file;
+       }
+
+       cq->device        = ib_dev;
+       cq->uobject       = &obj->uobject;
+       cq->comp_handler  = ib_uverbs_comp_handler;
+       cq->event_handler = ib_uverbs_cq_event_handler;
+       cq->cq_context    = ev_file ? &ev_file->ev_queue : NULL;
+       obj->uobject.object = cq;
+       obj->uobject.user_handle = user_handle;
+       atomic_set(&cq->usecnt, 0);
+       cq->res.type = RDMA_RESTRACK_CQ;
+       rdma_restrack_add(&cq->res);
+
+       ret = uverbs_copy_to(attrs, UVERBS_ATTR_CREATE_CQ_RESP_CQE, &cq->cqe,
+                            sizeof(cq->cqe));
+       if (ret)
+               goto err_cq;
+
+       return 0;
+err_cq:
+       ib_destroy_cq(cq);
+
+err_event_file:
+       if (ev_file)
+               uverbs_uobject_put(ev_file_uobj);
+       return ret;
+};
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ,
+                        UVERBS_ACCESS_NEW,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+                           UVERBS_ATTR_TYPE(u32),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+                           UVERBS_ATTR_TYPE(u64),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+                       UVERBS_OBJECT_COMP_CHANNEL,
+                       UVERBS_ACCESS_READ),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
+       &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
+                            UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
+
+static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
+                                                   struct ib_uverbs_file *file,
+                                                   struct uverbs_attr_bundle *attrs)
+{
+       struct ib_uverbs_destroy_cq_resp resp;
+       struct ib_uobject *uobj =
+               uverbs_attr_get(attrs, UVERBS_ATTR_DESTROY_CQ_HANDLE)->obj_attr.uobject;
+       struct ib_ucq_object *obj = container_of(uobj, struct ib_ucq_object,
+                                                uobject);
+       int ret;
+
+       if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
+               return -EOPNOTSUPP;
+
+       ret = rdma_explicit_destroy(uobj);
+       if (ret)
+               return ret;
+
+       resp.comp_events_reported  = obj->comp_events_reported;
+       resp.async_events_reported = obj->async_events_reported;
+
+       return uverbs_copy_to(attrs, UVERBS_ATTR_DESTROY_CQ_RESP, &resp,
+                             sizeof(resp));
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
+                        UVERBS_ACCESS_DESTROY,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+                            UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+                            UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ,
+                           &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
+                                                     uverbs_free_cq),
+#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
+                           &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+                           &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+#endif
+                          );
+
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
new file mode 100644 (file)
index 0000000..8b68157
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_dm(struct ib_uobject *uobject,
+                         enum rdma_remove_reason why)
+{
+       struct ib_dm *dm = uobject->object;
+
+       if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt))
+               return -EBUSY;
+
+       return dm->device->dealloc_dm(dm);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
+                                                 struct ib_uverbs_file *file,
+                                                 struct uverbs_attr_bundle *attrs)
+{
+       struct ib_ucontext *ucontext = file->ucontext;
+       struct ib_dm_alloc_attr attr = {};
+       struct ib_uobject *uobj;
+       struct ib_dm *dm;
+       int ret;
+
+       if (!ib_dev->alloc_dm)
+               return -EOPNOTSUPP;
+
+       ret = uverbs_copy_from(&attr.length, attrs,
+                              UVERBS_ATTR_ALLOC_DM_LENGTH);
+       if (ret)
+               return ret;
+
+       ret = uverbs_copy_from(&attr.alignment, attrs,
+                              UVERBS_ATTR_ALLOC_DM_ALIGNMENT);
+       if (ret)
+               return ret;
+
+       uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject;
+
+       dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs);
+       if (IS_ERR(dm))
+               return PTR_ERR(dm);
+
+       dm->device  = ib_dev;
+       dm->length  = attr.length;
+       dm->uobject = uobj;
+       atomic_set(&dm->usecnt, 0);
+
+       uobj->object = dm;
+
+       return 0;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM,
+                        UVERBS_ACCESS_NEW,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+                           UVERBS_ATTR_TYPE(u64),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+                           UVERBS_ATTR_TYPE(u32),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE,
+       uverbs_destroy_def_handler,
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+                        UVERBS_OBJECT_DM,
+                        UVERBS_ACCESS_DESTROY,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
+                           /* 1 is used in order to free the DM after MRs */
+                           &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm),
+                           &UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
+                           &UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
new file mode 100644 (file)
index 0000000..cbcec3d
--- /dev/null
@@ -0,0 +1,435 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_flow_action(struct ib_uobject *uobject,
+                                  enum rdma_remove_reason why)
+{
+       struct ib_flow_action *action = uobject->object;
+
+       if (why == RDMA_REMOVE_DESTROY &&
+           atomic_read(&action->usecnt))
+               return -EBUSY;
+
+       return action->device->destroy_flow_action(action);
+}
+
+static u64 esp_flags_uverbs_to_verbs(struct uverbs_attr_bundle *attrs,
+                                    u32 flags, bool is_modify)
+{
+       u64 verbs_flags = flags;
+
+       if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ESN))
+               verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED;
+
+       if (is_modify && uverbs_attr_is_valid(attrs,
+                                             UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS))
+               verbs_flags |= IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS;
+
+       return verbs_flags;
+};
+
+static int validate_flow_action_esp_keymat_aes_gcm(struct ib_flow_action_attrs_esp_keymats *keymat)
+{
+       struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm =
+               &keymat->keymat.aes_gcm;
+
+       if (aes_gcm->iv_algo > IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ)
+               return -EOPNOTSUPP;
+
+       if (aes_gcm->key_len != 32 &&
+           aes_gcm->key_len != 24 &&
+           aes_gcm->key_len != 16)
+               return -EINVAL;
+
+       if (aes_gcm->icv_len != 16 &&
+           aes_gcm->icv_len != 8 &&
+           aes_gcm->icv_len != 12)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int (* const flow_action_esp_keymat_validate[])(struct ib_flow_action_attrs_esp_keymats *keymat) = {
+       [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = validate_flow_action_esp_keymat_aes_gcm,
+};
+
+static int flow_action_esp_replay_none(struct ib_flow_action_attrs_esp_replays *replay,
+                                      bool is_modify)
+{
+       /* This is used in order to modify an esp flow action with an enabled
+        * replay protection to a disabled one. This is only supported via
+        * modify, as in create verb we can simply drop the REPLAY attribute and
+        * achieve the same thing.
+        */
+       return is_modify ? 0 : -EINVAL;
+}
+
+static int flow_action_esp_replay_def_ok(struct ib_flow_action_attrs_esp_replays *replay,
+                                        bool is_modify)
+{
+       /* Some replay protections could always be enabled without validating
+        * anything.
+        */
+       return 0;
+}
+
+static int (* const flow_action_esp_replay_validate[])(struct ib_flow_action_attrs_esp_replays *replay,
+                                                      bool is_modify) = {
+       [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = flow_action_esp_replay_none,
+       [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = flow_action_esp_replay_def_ok,
+};
+
+static int parse_esp_ip(enum ib_flow_spec_type proto,
+                       const void __user *val_ptr,
+                       size_t len, union ib_flow_spec *out)
+{
+       int ret;
+       const struct ib_uverbs_flow_ipv4_filter ipv4 = {
+               .src_ip = cpu_to_be32(0xffffffffUL),
+               .dst_ip = cpu_to_be32(0xffffffffUL),
+               .proto = 0xff,
+               .tos = 0xff,
+               .ttl = 0xff,
+               .flags = 0xff,
+       };
+       const struct ib_uverbs_flow_ipv6_filter ipv6 = {
+               .src_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+                          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+               .dst_ip = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+                          0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+               .flow_label = cpu_to_be32(0xffffffffUL),
+               .next_hdr = 0xff,
+               .traffic_class = 0xff,
+               .hop_limit = 0xff,
+       };
+       union {
+               struct ib_uverbs_flow_ipv4_filter ipv4;
+               struct ib_uverbs_flow_ipv6_filter ipv6;
+       } user_val = {};
+       const void *user_pmask;
+       size_t val_len;
+
+       /* If the flow IPv4/IPv6 flow specifications are extended, the mask
+        * should be changed as well.
+        */
+       BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv4_filter, flags) +
+                    sizeof(ipv4.flags) != sizeof(ipv4));
+       BUILD_BUG_ON(offsetof(struct ib_uverbs_flow_ipv6_filter, reserved) +
+                    sizeof(ipv6.reserved) != sizeof(ipv6));
+
+       switch (proto) {
+       case IB_FLOW_SPEC_IPV4:
+               if (len > sizeof(user_val.ipv4) &&
+                   !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv4),
+                                         len - sizeof(user_val.ipv4)))
+                       return -EOPNOTSUPP;
+
+               val_len = min_t(size_t, len, sizeof(user_val.ipv4));
+               ret = copy_from_user(&user_val.ipv4, val_ptr,
+                                    val_len);
+               if (ret)
+                       return -EFAULT;
+
+               user_pmask = &ipv4;
+               break;
+       case IB_FLOW_SPEC_IPV6:
+               if (len > sizeof(user_val.ipv6) &&
+                   !ib_is_buffer_cleared(val_ptr + sizeof(user_val.ipv6),
+                                         len - sizeof(user_val.ipv6)))
+                       return -EOPNOTSUPP;
+
+               val_len = min_t(size_t, len, sizeof(user_val.ipv6));
+               ret = copy_from_user(&user_val.ipv6, val_ptr,
+                                    val_len);
+               if (ret)
+                       return -EFAULT;
+
+               user_pmask = &ipv6;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       return ib_uverbs_kern_spec_to_ib_spec_filter(proto, user_pmask,
+                                                    &user_val,
+                                                    val_len, out);
+}
+
+static int flow_action_esp_get_encap(struct ib_flow_spec_list *out,
+                                    struct uverbs_attr_bundle *attrs)
+{
+       struct ib_uverbs_flow_action_esp_encap uverbs_encap;
+       int ret;
+
+       ret = uverbs_copy_from(&uverbs_encap, attrs,
+                              UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP);
+       if (ret)
+               return ret;
+
+       /* We currently support only one encap */
+       if (uverbs_encap.next_ptr)
+               return -EOPNOTSUPP;
+
+       if (uverbs_encap.type != IB_FLOW_SPEC_IPV4 &&
+           uverbs_encap.type != IB_FLOW_SPEC_IPV6)
+               return -EOPNOTSUPP;
+
+       return parse_esp_ip(uverbs_encap.type,
+                           u64_to_user_ptr(uverbs_encap.val_ptr),
+                           uverbs_encap.len,
+                           &out->spec);
+}
+
+struct ib_flow_action_esp_attr {
+       struct  ib_flow_action_attrs_esp                hdr;
+       struct  ib_flow_action_attrs_esp_keymats        keymat;
+       struct  ib_flow_action_attrs_esp_replays        replay;
+       /* We currently support only one spec */
+       struct  ib_flow_spec_list                       encap;
+};
+
+#define ESP_LAST_SUPPORTED_FLAG                IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW
+static int parse_flow_action_esp(struct ib_device *ib_dev,
+                                struct ib_uverbs_file *file,
+                                struct uverbs_attr_bundle *attrs,
+                                struct ib_flow_action_esp_attr *esp_attr,
+                                bool is_modify)
+{
+       struct ib_uverbs_flow_action_esp uverbs_esp = {};
+       int ret;
+
+       /* Optional param, if it doesn't exist, we get -ENOENT and skip it */
+       ret = uverbs_copy_from(&esp_attr->hdr.esn, attrs,
+                              UVERBS_ATTR_FLOW_ACTION_ESP_ESN);
+       if (IS_UVERBS_COPY_ERR(ret))
+               return ret;
+
+       /* This can be called from FLOW_ACTION_ESP_MODIFY where
+        * UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS is optional
+        */
+       if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS)) {
+               ret = uverbs_copy_from_or_zero(&uverbs_esp, attrs,
+                                              UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS);
+               if (ret)
+                       return ret;
+
+               if (uverbs_esp.flags & ~((ESP_LAST_SUPPORTED_FLAG << 1) - 1))
+                       return -EOPNOTSUPP;
+
+               esp_attr->hdr.spi = uverbs_esp.spi;
+               esp_attr->hdr.seq = uverbs_esp.seq;
+               esp_attr->hdr.tfc_pad = uverbs_esp.tfc_pad;
+               esp_attr->hdr.hard_limit_pkts = uverbs_esp.hard_limit_pkts;
+       }
+       esp_attr->hdr.flags = esp_flags_uverbs_to_verbs(attrs, uverbs_esp.flags,
+                                                       is_modify);
+
+       if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT)) {
+               esp_attr->keymat.protocol =
+                       uverbs_attr_get_enum_id(attrs,
+                                               UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+               ret = uverbs_copy_from_or_zero(&esp_attr->keymat.keymat,
+                                              attrs,
+                                              UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT);
+               if (ret)
+                       return ret;
+
+               ret = flow_action_esp_keymat_validate[esp_attr->keymat.protocol](&esp_attr->keymat);
+               if (ret)
+                       return ret;
+
+               esp_attr->hdr.keymat = &esp_attr->keymat;
+       }
+
+       if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY)) {
+               esp_attr->replay.protocol =
+                       uverbs_attr_get_enum_id(attrs,
+                                               UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+
+               ret = uverbs_copy_from_or_zero(&esp_attr->replay.replay,
+                                              attrs,
+                                              UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY);
+               if (ret)
+                       return ret;
+
+               ret = flow_action_esp_replay_validate[esp_attr->replay.protocol](&esp_attr->replay,
+                                                                                is_modify);
+               if (ret)
+                       return ret;
+
+               esp_attr->hdr.replay = &esp_attr->replay;
+       }
+
+       if (uverbs_attr_is_valid(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP)) {
+               ret = flow_action_esp_get_encap(&esp_attr->encap, attrs);
+               if (ret)
+                       return ret;
+
+               esp_attr->hdr.encap = &esp_attr->encap;
+       }
+
+       return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device *ib_dev,
+                                                               struct ib_uverbs_file *file,
+                                                               struct uverbs_attr_bundle *attrs)
+{
+       int                               ret;
+       struct ib_uobject                 *uobj;
+       struct ib_flow_action             *action;
+       struct ib_flow_action_esp_attr    esp_attr = {};
+
+       if (!ib_dev->create_flow_action_esp)
+               return -EOPNOTSUPP;
+
+       ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, false);
+       if (ret)
+               return ret;
+
+       /* No need to check as this attribute is marked as MANDATORY */
+       uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+       action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
+       if (IS_ERR(action))
+               return PTR_ERR(action);
+
+       atomic_set(&action->usecnt, 0);
+       action->device = ib_dev;
+       action->type = IB_FLOW_ACTION_ESP;
+       action->uobject = uobj;
+       uobj->object = action;
+
+       return 0;
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device *ib_dev,
+                                                               struct ib_uverbs_file *file,
+                                                               struct uverbs_attr_bundle *attrs)
+{
+       int                               ret;
+       struct ib_uobject                 *uobj;
+       struct ib_flow_action             *action;
+       struct ib_flow_action_esp_attr    esp_attr = {};
+
+       if (!ib_dev->modify_flow_action_esp)
+               return -EOPNOTSUPP;
+
+       ret = parse_flow_action_esp(ib_dev, file, attrs, &esp_attr, true);
+       if (ret)
+               return ret;
+
+       uobj = uverbs_attr_get(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE)->obj_attr.uobject;
+       action = uobj->object;
+
+       if (action->type != IB_FLOW_ACTION_ESP)
+               return -EINVAL;
+
+       return ib_dev->modify_flow_action_esp(action,
+                                             &esp_attr.hdr,
+                                             attrs);
+}
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
+       [IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
+               .ptr = {
+                       .type = UVERBS_ATTR_TYPE_PTR_IN,
+                       UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
+                       .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+               },
+       },
+};
+
+static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
+       [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
+               .ptr = {
+                       .type = UVERBS_ATTR_TYPE_PTR_IN,
+                       /* No need to specify any data */
+                       .len = 0,
+               }
+       },
+       [IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
+               .ptr = {
+                       .type = UVERBS_ATTR_TYPE_PTR_IN,
+                       UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
+                       .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+               }
+       },
+};
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+                        UVERBS_ACCESS_NEW,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+                           UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
+                                    UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
+       &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+                            uverbs_flow_action_esp_keymat,
+                            UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+                            uverbs_flow_action_esp_replay),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+                           UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
+                        UVERBS_ACCESS_WRITE,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+                           UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
+       &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+                            uverbs_flow_action_esp_keymat),
+       &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+                            uverbs_flow_action_esp_replay),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+                           UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
+
+static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY,
+       uverbs_destroy_def_handler,
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+                        UVERBS_OBJECT_FLOW_ACTION,
+                        UVERBS_ACCESS_DESTROY,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION,
+                           &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action),
+                           &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
+                           &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
+                           &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
+
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
new file mode 100644 (file)
index 0000000..68f7cad
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "uverbs.h"
+#include <rdma/uverbs_std_types.h>
+
+static int uverbs_free_mr(struct ib_uobject *uobject,
+                         enum rdma_remove_reason why)
+{
+       return ib_dereg_mr((struct ib_mr *)uobject->object);
+}
+
+static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(struct ib_device *ib_dev,
+                                                  struct ib_uverbs_file *file,
+                                                  struct uverbs_attr_bundle *attrs)
+{
+       struct ib_dm_mr_attr attr = {};
+       struct ib_uobject *uobj;
+       struct ib_dm *dm;
+       struct ib_pd *pd;
+       struct ib_mr *mr;
+       int ret;
+
+       if (!ib_dev->reg_dm_mr)
+               return -EOPNOTSUPP;
+
+       ret = uverbs_copy_from(&attr.offset, attrs, UVERBS_ATTR_REG_DM_MR_OFFSET);
+       if (ret)
+               return ret;
+
+       ret = uverbs_copy_from(&attr.length, attrs,
+                              UVERBS_ATTR_REG_DM_MR_LENGTH);
+       if (ret)
+               return ret;
+
+       ret = uverbs_copy_from(&attr.access_flags, attrs,
+                              UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS);
+       if (ret)
+               return ret;
+
+       if (!(attr.access_flags & IB_ZERO_BASED))
+               return -EINVAL;
+
+       ret = ib_check_mr_access(attr.access_flags);
+       if (ret)
+               return ret;
+
+       pd = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_PD_HANDLE);
+
+       dm = uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_DM_MR_DM_HANDLE);
+
+       uobj = uverbs_attr_get(attrs, UVERBS_ATTR_REG_DM_MR_HANDLE)->obj_attr.uobject;
+
+       if (attr.offset > dm->length || attr.length > dm->length ||
+           attr.length > dm->length - attr.offset)
+               return -EINVAL;
+
+       mr = pd->device->reg_dm_mr(pd, dm, &attr, attrs);
+       if (IS_ERR(mr))
+               return PTR_ERR(mr);
+
+       mr->device  = pd->device;
+       mr->pd      = pd;
+       mr->dm      = dm;
+       mr->uobject = uobj;
+       atomic_inc(&pd->usecnt);
+       atomic_inc(&dm->usecnt);
+
+       uobj->object = mr;
+
+       ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_LKEY, &mr->lkey,
+                            sizeof(mr->lkey));
+       if (ret)
+               goto err_dereg;
+
+       ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+                            &mr->rkey, sizeof(mr->rkey));
+       if (ret)
+               goto err_dereg;
+
+       return 0;
+
+err_dereg:
+       ib_dereg_mr(mr);
+
+       return ret;
+}
+
+static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR,
+                        UVERBS_ACCESS_NEW,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+                           UVERBS_ATTR_TYPE(u64),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+                           UVERBS_ATTR_TYPE(u64),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD,
+                        UVERBS_ACCESS_READ,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+                           UVERBS_ATTR_TYPE(u32),
+                           UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM,
+                        UVERBS_ACCESS_READ,
+                        UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
+                            UVERBS_ATTR_TYPE(u32),
+                            UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
+       &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+                            UVERBS_ATTR_TYPE(u32),
+                            UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+
+DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
+                           /* 1 is used in order to free the MR after all the MWs */
+                           &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr),
+                           &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
index 93025d2..7eff3ae 100644 (file)
@@ -655,7 +655,7 @@ int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
 
        return ah->device->modify_ah ?
                ah->device->modify_ah(ah, ah_attr) :
-               -ENOSYS;
+               -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(rdma_modify_ah);
 
@@ -663,7 +663,7 @@ int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
 {
        return ah->device->query_ah ?
                ah->device->query_ah(ah, ah_attr) :
-               -ENOSYS;
+               -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(rdma_query_ah);
 
@@ -689,7 +689,7 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
        struct ib_srq *srq;
 
        if (!pd->device->create_srq)
-               return ERR_PTR(-ENOSYS);
+               return ERR_PTR(-EOPNOTSUPP);
 
        srq = pd->device->create_srq(pd, srq_init_attr, NULL);
 
@@ -722,7 +722,7 @@ int ib_modify_srq(struct ib_srq *srq,
 {
        return srq->device->modify_srq ?
                srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
-               -ENOSYS;
+               -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(ib_modify_srq);
 
@@ -730,7 +730,7 @@ int ib_query_srq(struct ib_srq *srq,
                 struct ib_srq_attr *srq_attr)
 {
        return srq->device->query_srq ?
-               srq->device->query_srq(srq, srq_attr) : -ENOSYS;
+               srq->device->query_srq(srq, srq_attr) : -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(ib_query_srq);
 
@@ -1263,34 +1263,30 @@ static const struct {
        }
 };
 
-int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
-                      enum ib_qp_type type, enum ib_qp_attr_mask mask,
-                      enum rdma_link_layer ll)
+bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
+                       enum ib_qp_type type, enum ib_qp_attr_mask mask,
+                       enum rdma_link_layer ll)
 {
        enum ib_qp_attr_mask req_param, opt_param;
 
-       if (cur_state  < 0 || cur_state  > IB_QPS_ERR ||
-           next_state < 0 || next_state > IB_QPS_ERR)
-               return 0;
-
        if (mask & IB_QP_CUR_STATE  &&
            cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
            cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
-               return 0;
+               return false;
 
        if (!qp_state_table[cur_state][next_state].valid)
-               return 0;
+               return false;
 
        req_param = qp_state_table[cur_state][next_state].req_param[type];
        opt_param = qp_state_table[cur_state][next_state].opt_param[type];
 
        if ((mask & req_param) != req_param)
-               return 0;
+               return false;
 
        if (mask & ~(req_param | opt_param | IB_QP_STATE))
-               return 0;
+               return false;
 
-       return 1;
+       return true;
 }
 EXPORT_SYMBOL(ib_modify_qp_is_ok);
 
@@ -1457,7 +1453,7 @@ int ib_query_qp(struct ib_qp *qp,
 {
        return qp->device->query_qp ?
                qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
-               -ENOSYS;
+               -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(ib_query_qp);
 
@@ -1594,7 +1590,7 @@ EXPORT_SYMBOL(ib_create_cq);
 int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 {
        return cq->device->modify_cq ?
-               cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
+               cq->device->modify_cq(cq, cq_count, cq_period) : -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(rdma_set_cq_moderation);
 
@@ -1611,7 +1607,7 @@ EXPORT_SYMBOL(ib_destroy_cq);
 int ib_resize_cq(struct ib_cq *cq, int cqe)
 {
        return cq->device->resize_cq ?
-               cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
+               cq->device->resize_cq(cq, cqe, NULL) : -EOPNOTSUPP;
 }
 EXPORT_SYMBOL(ib_resize_cq);
 
@@ -1620,11 +1616,16 @@ EXPORT_SYMBOL(ib_resize_cq);
 int ib_dereg_mr(struct ib_mr *mr)
 {
&n