Merge branch 'mlxsw-Add-VxLAN-support'
authorDavid S. Miller <davem@davemloft.net>
Thu, 18 Oct 2018 00:45:08 +0000 (17:45 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 18 Oct 2018 00:45:08 +0000 (17:45 -0700)
Ido Schimmel says:

====================
mlxsw: Add VxLAN support

This patchset adds support for VxLAN offload in the mlxsw driver.

With regards to the forwarding plane, VxLAN support is composed from two
main parts: Encapsulation and decapsulation.

In the device, NVE encapsulation (and VxLAN in particular) takes place
in the bridge. A packet can be encapsulated using VxLAN either because
it hit an FDB entry that forwards it to the router with the IP of the
remote VTEP or because it was flooded, in which case it is sent to a
list of remote VTEPs (in addition to local ports). In either case, the
VNI is derived from the filtering identifier (FID) the packet was
classified to at ingress and the underlay source IP is taken from a
device global configuration.

VxLAN decapsulation takes place in the underlay router, where packets
that hit a local route that corresponds to the source IP of the local
VTEP are decapsulated and injected to the bridge. The packets are
classified to a FID based on the VNI they came with.

The first six patches export the required APIs in the VxLAN and mlxsw
drivers in order to allow for the introduction of the NVE core in the
next two patches. The NVE core is designed to support a variety of NVE
encapsulations (e.g., VxLAN, NVGRE) and different ASICs, but currently
only VxLAN and Spectrum are supported. Spectrum-2 support will be added
in the future.

The last 10 patches add support for VxLAN decapsulation and
encapsulation and include the addition of the required switchdev APIs in
the VxLAN driver. These APIs allow capable drivers to get a notification
about the addition / deletion of FDB entries to / from the VxLAN's FDB.

Subsequent patchset will add selftests (generic and mlxsw-specific),
data plane learning, FDB extack and vetoing and support for VLAN-aware
bridges (one VNI per VxLAN device model).

v2:
* Implement netif_is_vxlan() using rtnl_link_ops->kind (Jakub & Stephen)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
20 files changed:
drivers/net/ethernet/mellanox/mlxsw/Makefile
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
drivers/net/ethernet/mellanox/mlxsw/spectrum.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c [new file with mode: 0644]
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
drivers/net/ethernet/rocker/rocker_main.c
drivers/net/vxlan.c
include/net/inet_ecn.h
include/net/switchdev.h
include/net/vxlan.h
net/bridge/br.c
net/bridge/br_fdb.c
net/bridge/br_private.h
net/bridge/br_switchdev.c
net/dsa/slave.c

index 68fa44a41485319d894345db7e5e09efc05b7fbf..1f77e97e2d7aa6b07e265de0ddb7338d75c59611 100644 (file)
@@ -27,7 +27,8 @@ mlxsw_spectrum-objs           := spectrum.o spectrum_buffers.o \
                                   spectrum_acl_flex_keys.o \
                                   spectrum1_mr_tcam.o spectrum2_mr_tcam.o \
                                   spectrum_mr_tcam.o spectrum_mr.o \
-                                  spectrum_qdisc.o spectrum_span.o
+                                  spectrum_qdisc.o spectrum_span.o \
+                                  spectrum_nve.o spectrum_nve_vxlan.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)    += spectrum_dcb.o
 mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
 obj-$(CONFIG_MLXSW_MINIMAL)    += mlxsw_minimal.o
index ed7e4c4e740379f6655d36abb8322925a9287981..8a4983adae940a08b4d4d5ec39637522fb1bea46 100644 (file)
@@ -2994,6 +2994,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                goto err_port_qdiscs_init;
        }
 
+       err = mlxsw_sp_port_nve_init(mlxsw_sp_port);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n",
+                       mlxsw_sp_port->local_port);
+               goto err_port_nve_init;
+       }
+
        mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
        if (IS_ERR(mlxsw_sp_port_vlan)) {
                dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n",
@@ -3022,6 +3029,8 @@ err_register_netdev:
        mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
        mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
 err_port_vlan_get:
+       mlxsw_sp_port_nve_fini(mlxsw_sp_port);
+err_port_nve_init:
        mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
 err_port_qdiscs_init:
        mlxsw_sp_port_fids_fini(mlxsw_sp_port);
@@ -3061,6 +3070,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
        mlxsw_sp->ports[local_port] = NULL;
        mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
        mlxsw_sp_port_vlan_flush(mlxsw_sp_port);
+       mlxsw_sp_port_nve_fini(mlxsw_sp_port);
        mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
        mlxsw_sp_port_fids_fini(mlxsw_sp_port);
        mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
@@ -3795,6 +3805,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
                goto err_afa_init;
        }
 
+       err = mlxsw_sp_nve_init(mlxsw_sp);
+       if (err) {
+               dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n");
+               goto err_nve_init;
+       }
+
        err = mlxsw_sp_router_init(mlxsw_sp);
        if (err) {
                dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
@@ -3841,6 +3857,8 @@ err_acl_init:
 err_netdev_notifier:
        mlxsw_sp_router_fini(mlxsw_sp);
 err_router_init:
+       mlxsw_sp_nve_fini(mlxsw_sp);
+err_nve_init:
        mlxsw_sp_afa_fini(mlxsw_sp);
 err_afa_init:
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3873,6 +3891,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
        mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
        mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
        mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
+       mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
 
        return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
@@ -3887,6 +3906,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
        mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
        mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
        mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+       mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
 
        return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
 }
@@ -3900,6 +3920,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
        mlxsw_sp_acl_fini(mlxsw_sp);
        unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
        mlxsw_sp_router_fini(mlxsw_sp);
+       mlxsw_sp_nve_fini(mlxsw_sp);
        mlxsw_sp_afa_fini(mlxsw_sp);
        mlxsw_sp_counter_pool_fini(mlxsw_sp);
        mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4566,6 +4587,41 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
        mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
 }
 
+static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev)
+{
+       unsigned int num_vxlans = 0;
+       struct net_device *dev;
+       struct list_head *iter;
+
+       netdev_for_each_lower_dev(br_dev, dev, iter) {
+               if (netif_is_vxlan(dev))
+                       num_vxlans++;
+       }
+
+       return num_vxlans > 1;
+}
+
+static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev,
+                                          struct netlink_ext_ack *extack)
+{
+       if (br_multicast_enabled(br_dev)) {
+               NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device");
+               return false;
+       }
+
+       if (br_vlan_enabled(br_dev)) {
+               NL_SET_ERR_MSG_MOD(extack, "VLAN filtering can not be enabled on a bridge with a VxLAN device");
+               return false;
+       }
+
+       if (mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) {
+               NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge");
+               return false;
+       }
+
+       return true;
+}
+
 static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                                               struct net_device *dev,
                                               unsigned long event, void *ptr)
@@ -4595,6 +4651,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
                }
                if (!info->linking)
                        break;
+               if (netif_is_bridge_master(upper_dev) &&
+                   !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+                   mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+                   !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+                       return -EOPNOTSUPP;
                if (netdev_has_any_upper_dev(upper_dev) &&
                    (!netif_is_bridge_master(upper_dev) ||
                     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4752,6 +4813,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
                }
                if (!info->linking)
                        break;
+               if (netif_is_bridge_master(upper_dev) &&
+                   !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+                   mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+                   !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+                       return -EOPNOTSUPP;
                if (netdev_has_any_upper_dev(upper_dev) &&
                    (!netif_is_bridge_master(upper_dev) ||
                     !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4898,6 +4964,63 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
        return netif_is_l3_master(info->upper_dev);
 }
 
+static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
+                                         struct net_device *dev,
+                                         unsigned long event, void *ptr)
+{
+       struct netdev_notifier_changeupper_info *cu_info;
+       struct netdev_notifier_info *info = ptr;
+       struct netlink_ext_ack *extack;
+       struct net_device *upper_dev;
+
+       extack = netdev_notifier_info_to_extack(info);
+
+       switch (event) {
+       case NETDEV_CHANGEUPPER:
+               cu_info = container_of(info,
+                                      struct netdev_notifier_changeupper_info,
+                                      info);
+               upper_dev = cu_info->upper_dev;
+               if (!netif_is_bridge_master(upper_dev))
+                       return 0;
+               if (!mlxsw_sp_lower_get(upper_dev))
+                       return 0;
+               if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+                       return -EOPNOTSUPP;
+               if (cu_info->linking) {
+                       if (!netif_running(dev))
+                               return 0;
+                       return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev,
+                                                         dev, extack);
+               } else {
+                       mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
+               }
+               break;
+       case NETDEV_PRE_UP:
+               upper_dev = netdev_master_upper_dev_get(dev);
+               if (!upper_dev)
+                       return 0;
+               if (!netif_is_bridge_master(upper_dev))
+                       return 0;
+               if (!mlxsw_sp_lower_get(upper_dev))
+                       return 0;
+               return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev,
+                                                 extack);
+       case NETDEV_DOWN:
+               upper_dev = netdev_master_upper_dev_get(dev);
+               if (!upper_dev)
+                       return 0;
+               if (!netif_is_bridge_master(upper_dev))
+                       return 0;
+               if (!mlxsw_sp_lower_get(upper_dev))
+                       return 0;
+               mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
+               break;
+       }
+
+       return 0;
+}
+
 static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
                                    unsigned long event, void *ptr)
 {
@@ -4914,6 +5037,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
        }
        mlxsw_sp_span_respin(mlxsw_sp);
 
+       if (netif_is_vxlan(dev))
+               err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr);
        if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
                err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
                                                       event, ptr);
index 1f68ac2a20f43e3dc07ae85ff94c2e023f838341..0875a79cbe7b1801f3b832b1aaf84acc9edce915 100644 (file)
@@ -16,6 +16,7 @@
 #include <net/psample.h>
 #include <net/pkt_cls.h>
 #include <net/red.h>
+#include <net/vxlan.h>
 
 #include "port.h"
 #include "core.h"
@@ -55,6 +56,8 @@ enum mlxsw_sp_resource_id {
 struct mlxsw_sp_port;
 struct mlxsw_sp_rif;
 struct mlxsw_sp_span_entry;
+enum mlxsw_sp_l3proto;
+union mlxsw_sp_l3addr;
 
 struct mlxsw_sp_upper {
        struct net_device *dev;
@@ -113,9 +116,11 @@ struct mlxsw_sp_acl;
 struct mlxsw_sp_counter_pool;
 struct mlxsw_sp_fid_core;
 struct mlxsw_sp_kvdl;
+struct mlxsw_sp_nve;
 struct mlxsw_sp_kvdl_ops;
 struct mlxsw_sp_mr_tcam_ops;
 struct mlxsw_sp_acl_tcam_ops;
+struct mlxsw_sp_nve_ops;
 
 struct mlxsw_sp {
        struct mlxsw_sp_port **ports;
@@ -132,6 +137,7 @@ struct mlxsw_sp {
        struct mlxsw_sp_acl *acl;
        struct mlxsw_sp_fid_core *fid_core;
        struct mlxsw_sp_kvdl *kvdl;
+       struct mlxsw_sp_nve *nve;
        struct notifier_block netdevice_nb;
 
        struct mlxsw_sp_counter_pool *counter_pool;
@@ -146,6 +152,7 @@ struct mlxsw_sp {
        const struct mlxsw_afk_ops *afk_ops;
        const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
        const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
+       const struct mlxsw_sp_nve_ops **nve_ops_arr;
 };
 
 static inline struct mlxsw_sp_upper *
@@ -235,6 +242,25 @@ struct mlxsw_sp_port {
        struct mlxsw_sp_acl_block *eg_acl_block;
 };
 
+static inline struct net_device *
+mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev)
+{
+       struct net_device *dev;
+       struct list_head *iter;
+
+       netdev_for_each_lower_dev(br_dev, dev, iter) {
+               if (netif_is_vxlan(dev))
+                       return dev;
+       }
+
+       return NULL;
+}
+
+static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev)
+{
+       return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev);
+}
+
 static inline bool
 mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
 {
@@ -330,6 +356,13 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
                                struct net_device *br_dev);
 bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
                                         const struct net_device *br_dev);
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+                              const struct net_device *br_dev,
+                              const struct net_device *vxlan_dev,
+                              struct netlink_ext_ack *extack);
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+                                const struct net_device *br_dev,
+                                const struct net_device *vxlan_dev);
 
 /* spectrum.c */
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -431,6 +464,15 @@ struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
                                              const struct net_device *dev);
 u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
 struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+                                     enum mlxsw_sp_l3proto ul_proto,
+                                     const union mlxsw_sp_l3addr *ul_sip,
+                                     u32 tunnel_index);
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+                                     enum mlxsw_sp_l3proto ul_proto,
+                                     const union mlxsw_sp_l3addr *ul_sip);
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+                               u16 *vr_id);
 
 /* spectrum_kvdl.c */
 enum mlxsw_sp_kvdl_entry_type {
@@ -679,6 +721,16 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
                           struct tc_prio_qopt_offload *p);
 
 /* spectrum_fid.c */
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+                                               __be32 vni);
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni);
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+                                    u32 nve_flood_index);
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid);
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni);
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid);
 int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
                           enum mlxsw_sp_flood_type packet_type, u8 local_port,
                           bool member);
@@ -697,6 +749,8 @@ u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid);
 struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid);
 struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
                                            int br_ifindex);
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+                                              int br_ifindex);
 struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
                                           u16 rif_index);
 struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp);
@@ -742,4 +796,39 @@ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops;
 /* spectrum2_mr_tcam.c */
 extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops;
 
+/* spectrum_nve.c */
+enum mlxsw_sp_nve_type {
+       MLXSW_SP_NVE_TYPE_VXLAN,
+};
+
+struct mlxsw_sp_nve_params {
+       enum mlxsw_sp_nve_type type;
+       __be32 vni;
+       const struct net_device *dev;
+};
+
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[];
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[];
+
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_fid *fid,
+                             enum mlxsw_sp_l3proto proto,
+                             union mlxsw_sp_l3addr *addr);
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_fid *fid,
+                              enum mlxsw_sp_l3proto proto,
+                              union mlxsw_sp_l3addr *addr);
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp);
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+                                     u32 tb_id, __be32 addr);
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+                           struct mlxsw_sp_nve_params *params,
+                           struct netlink_ext_ack *extack);
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_fid *fid);
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp);
+
 #endif
index 715d24ff937e90bbe96a15bde6afa18184ea6897..a3db033d73990940dc0bff4c2586efab0027f7bf 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/if_vlan.h>
 #include <linux/if_bridge.h>
 #include <linux/netdevice.h>
+#include <linux/rhashtable.h>
 #include <linux/rtnetlink.h>
 
 #include "spectrum.h"
@@ -14,6 +15,7 @@
 struct mlxsw_sp_fid_family;
 
 struct mlxsw_sp_fid_core {
+       struct rhashtable vni_ht;
        struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX];
        unsigned int *port_fid_mappings;
 };
@@ -24,6 +26,12 @@ struct mlxsw_sp_fid {
        unsigned int ref_count;
        u16 fid_index;
        struct mlxsw_sp_fid_family *fid_family;
+
+       struct rhash_head vni_ht_node;
+       __be32 vni;
+       u32 nve_flood_index;
+       u8 vni_valid:1,
+          nve_flood_index_valid:1;
 };
 
 struct mlxsw_sp_fid_8021q {
@@ -36,6 +44,12 @@ struct mlxsw_sp_fid_8021d {
        int br_ifindex;
 };
 
+static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = {
+       .key_len = sizeof_field(struct mlxsw_sp_fid, vni),
+       .key_offset = offsetof(struct mlxsw_sp_fid, vni),
+       .head_offset = offsetof(struct mlxsw_sp_fid, vni_ht_node),
+};
+
 struct mlxsw_sp_flood_table {
        enum mlxsw_sp_flood_type packet_type;
        enum mlxsw_reg_sfgc_bridge_type bridge_type;
@@ -56,6 +70,11 @@ struct mlxsw_sp_fid_ops {
                            struct mlxsw_sp_port *port, u16 vid);
        void (*port_vid_unmap)(struct mlxsw_sp_fid *fid,
                               struct mlxsw_sp_port *port, u16 vid);
+       int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni);
+       void (*vni_clear)(struct mlxsw_sp_fid *fid);
+       int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid,
+                                  u32 nve_flood_index);
+       void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid);
 };
 
 struct mlxsw_sp_fid_family {
@@ -94,6 +113,117 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = {
        [MLXSW_SP_FLOOD_TYPE_MC]        = mlxsw_sp_sfgc_mc_packet_types,
 };
 
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+                                               __be32 vni)
+{
+       struct mlxsw_sp_fid *fid;
+
+       fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni,
+                                    mlxsw_sp_fid_vni_ht_params);
+       if (fid)
+               fid->ref_count++;
+
+       return fid;
+}
+
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni)
+{
+       if (!fid->vni_valid)
+               return -EINVAL;
+
+       *vni = fid->vni;
+
+       return 0;
+}
+
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+                                    u32 nve_flood_index)
+{
+       struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+       const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+       int err;
+
+       if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid))
+               return -EINVAL;
+
+       err = ops->nve_flood_index_set(fid, nve_flood_index);
+       if (err)
+               return err;
+
+       fid->nve_flood_index = nve_flood_index;
+       fid->nve_flood_index_valid = true;
+
+       return 0;
+}
+
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+       struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+       const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+
+       if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid))
+               return;
+
+       fid->nve_flood_index_valid = false;
+       ops->nve_flood_index_clear(fid);
+}
+
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid)
+{
+       return fid->nve_flood_index_valid;
+}
+
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+{
+       struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+       const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+       struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+       int err;
+
+       if (WARN_ON(!ops->vni_set || fid->vni_valid))
+               return -EINVAL;
+
+       fid->vni = vni;
+       err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht,
+                                           &fid->vni_ht_node,
+                                           mlxsw_sp_fid_vni_ht_params);
+       if (err)
+               return err;
+
+       err = ops->vni_set(fid, vni);
+       if (err)
+               goto err_vni_set;
+
+       fid->vni_valid = true;
+
+       return 0;
+
+err_vni_set:
+       rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+                              mlxsw_sp_fid_vni_ht_params);
+       return err;
+}
+
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid)
+{
+       struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+       const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+       struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+
+       if (WARN_ON(!ops->vni_clear || !fid->vni_valid))
+               return;
+
+       fid->vni_valid = false;
+       ops->vni_clear(fid);
+       rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+                              mlxsw_sp_fid_vni_ht_params);
+}
+
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid)
+{
+       return fid->vni_valid;
+}
+
 static const struct mlxsw_sp_flood_table *
 mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid,
                                enum mlxsw_sp_flood_type packet_type)
@@ -217,6 +347,21 @@ static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
 }
 
+static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
+                              __be32 vni, bool vni_valid, u32 nve_flood_index,
+                              bool nve_flood_index_valid)
+{
+       char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+       mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index,
+                           0);
+       mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid);
+       mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni));
+       mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid);
+       mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
+}
+
 static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
                                u16 vid, bool valid)
 {
@@ -393,6 +538,8 @@ static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid)
 
 static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid)
 {
+       if (fid->vni_valid)
+               mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid);
        mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false);
 }
 
@@ -531,6 +678,41 @@ mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid,
                                    mlxsw_sp_port->local_port, vid, false);
 }
 
+static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+{
+       struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+       return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni,
+                                  true, fid->nve_flood_index,
+                                  fid->nve_flood_index_valid);
+}
+
+static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid)
+{
+       struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+       mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false,
+                           fid->nve_flood_index, fid->nve_flood_index_valid);
+}
+
+static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+                                                 u32 nve_flood_index)
+{
+       struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+       return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index,
+                                  fid->vni, fid->vni_valid, nve_flood_index,
+                                  true);
+}
+
+static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+       struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+       mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni,
+                           fid->vni_valid, 0, false);
+}
+
 static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
        .setup                  = mlxsw_sp_fid_8021d_setup,
        .configure              = mlxsw_sp_fid_8021d_configure,
@@ -540,6 +722,10 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
        .flood_index            = mlxsw_sp_fid_8021d_flood_index,
        .port_vid_map           = mlxsw_sp_fid_8021d_port_vid_map,
        .port_vid_unmap         = mlxsw_sp_fid_8021d_port_vid_unmap,
+       .vni_set                = mlxsw_sp_fid_8021d_vni_set,
+       .vni_clear              = mlxsw_sp_fid_8021d_vni_clear,
+       .nve_flood_index_set    = mlxsw_sp_fid_8021d_nve_flood_index_set,
+       .nve_flood_index_clear  = mlxsw_sp_fid_8021d_nve_flood_index_clear,
 };
 
 static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = {
@@ -708,14 +894,12 @@ static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = {
        [MLXSW_SP_FID_TYPE_DUMMY]       = &mlxsw_sp_fid_dummy_family,
 };
 
-static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
-                                            enum mlxsw_sp_fid_type type,
-                                            const void *arg)
+static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp,
+                                               enum mlxsw_sp_fid_type type,
+                                               const void *arg)
 {
        struct mlxsw_sp_fid_family *fid_family;
        struct mlxsw_sp_fid *fid;
-       u16 fid_index;
-       int err;
 
        fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
        list_for_each_entry(fid, &fid_family->fids_list, list) {
@@ -725,6 +909,23 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
                return fid;
        }
 
+       return NULL;
+}
+
+static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
+                                            enum mlxsw_sp_fid_type type,
+                                            const void *arg)
+{
+       struct mlxsw_sp_fid_family *fid_family;
+       struct mlxsw_sp_fid *fid;
+       u16 fid_index;
+       int err;
+
+       fid = mlxsw_sp_fid_lookup(mlxsw_sp, type, arg);
+       if (fid)
+               return fid;
+
+       fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
        fid = kzalloc(fid_family->fid_size, GFP_KERNEL);
        if (!fid)
                return ERR_PTR(-ENOMEM);
@@ -784,6 +985,13 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
        return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex);
 }
 
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+                                              int br_ifindex)
+{
+       return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D,
+                                  &br_ifindex);
+}
+
 struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
                                           u16 rif_index)
 {
@@ -918,6 +1126,10 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp)
                return -ENOMEM;
        mlxsw_sp->fid_core = fid_core;
 
+       err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params);
+       if (err)
+               goto err_rhashtable_init;
+
        fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int),
                                              GFP_KERNEL);
        if (!fid_core->port_fid_mappings) {
@@ -944,6 +1156,8 @@ err_fid_ops_register:
        }
        kfree(fid_core->port_fid_mappings);
 err_alloc_port_fid_mappings:
+       rhashtable_destroy(&fid_core->vni_ht);
+err_rhashtable_init:
        kfree(fid_core);
        return err;
 }
@@ -957,5 +1171,6 @@ void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp)
                mlxsw_sp_fid_family_unregister(mlxsw_sp,
                                               fid_core->fid_family_arr[i]);
        kfree(fid_core->port_fid_mappings);
+       rhashtable_destroy(&fid_core->vni_ht);
        kfree(fid_core);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
new file mode 100644 (file)
index 0000000..ad06d99
--- /dev/null
@@ -0,0 +1,982 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "spectrum_nve.h"
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[] = {
+       [MLXSW_SP_NVE_TYPE_VXLAN]       = &mlxsw_sp1_nve_vxlan_ops,
+};
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[] = {
+       [MLXSW_SP_NVE_TYPE_VXLAN]       = &mlxsw_sp2_nve_vxlan_ops,
+};
+
+struct mlxsw_sp_nve_mc_entry;
+struct mlxsw_sp_nve_mc_record;
+struct mlxsw_sp_nve_mc_list;
+
+struct mlxsw_sp_nve_mc_record_ops {
+       enum mlxsw_reg_tnumt_record_type type;
+       int (*entry_add)(struct mlxsw_sp_nve_mc_record *mc_record,
+                        struct mlxsw_sp_nve_mc_entry *mc_entry,
+                        const union mlxsw_sp_l3addr *addr);
+       void (*entry_del)(const struct mlxsw_sp_nve_mc_record *mc_record,
+                         const struct mlxsw_sp_nve_mc_entry *mc_entry);
+       void (*entry_set)(const struct mlxsw_sp_nve_mc_record *mc_record,
+                         const struct mlxsw_sp_nve_mc_entry *mc_entry,
+                         char *tnumt_pl, unsigned int entry_index);
+       bool (*entry_compare)(const struct mlxsw_sp_nve_mc_record *mc_record,
+                             const struct mlxsw_sp_nve_mc_entry *mc_entry,
+                             const union mlxsw_sp_l3addr *addr);
+};
+
+struct mlxsw_sp_nve_mc_list_key {
+       u16 fid_index;
+};
+
+struct mlxsw_sp_nve_mc_ipv6_entry {
+       struct in6_addr addr6;
+       u32 addr6_kvdl_index;
+};
+
+struct mlxsw_sp_nve_mc_entry {
+       union {
+               __be32 addr4;
+               struct mlxsw_sp_nve_mc_ipv6_entry ipv6_entry;
+       };
+       u8 valid:1;
+};
+
+struct mlxsw_sp_nve_mc_record {
+       struct list_head list;
+       enum mlxsw_sp_l3proto proto;
+       unsigned int num_entries;
+       struct mlxsw_sp *mlxsw_sp;
+       struct mlxsw_sp_nve_mc_list *mc_list;
+       const struct mlxsw_sp_nve_mc_record_ops *ops;
+       u32 kvdl_index;
+       struct mlxsw_sp_nve_mc_entry entries[0];
+};
+
+struct mlxsw_sp_nve_mc_list {
+       struct list_head records_list;
+       struct rhash_head ht_node;
+       struct mlxsw_sp_nve_mc_list_key key;
+};
+
+static const struct rhashtable_params mlxsw_sp_nve_mc_list_ht_params = {
+       .key_len = sizeof(struct mlxsw_sp_nve_mc_list_key),
+       .key_offset = offsetof(struct mlxsw_sp_nve_mc_list, key),
+       .head_offset = offsetof(struct mlxsw_sp_nve_mc_list, ht_node),
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv4_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+                                     struct mlxsw_sp_nve_mc_entry *mc_entry,
+                                     const union mlxsw_sp_l3addr *addr)
+{
+       mc_entry->addr4 = addr->addr4;
+
+       return 0;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+                                     const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+                                     const struct mlxsw_sp_nve_mc_entry *mc_entry,
+                                     char *tnumt_pl, unsigned int entry_index)
+{
+       u32 udip = be32_to_cpu(mc_entry->addr4);
+
+       mlxsw_reg_tnumt_udip_set(tnumt_pl, entry_index, udip);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv4_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+                                         const struct mlxsw_sp_nve_mc_entry *mc_entry,
+                                         const union mlxsw_sp_l3addr *addr)
+{
+       return mc_entry->addr4 == addr->addr4;
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv4_ops = {
+       .type           = MLXSW_REG_TNUMT_RECORD_TYPE_IPV4,
+       .entry_add      = &mlxsw_sp_nve_mc_record_ipv4_entry_add,
+       .entry_del      = &mlxsw_sp_nve_mc_record_ipv4_entry_del,
+       .entry_set      = &mlxsw_sp_nve_mc_record_ipv4_entry_set,
+       .entry_compare  = &mlxsw_sp_nve_mc_record_ipv4_entry_compare,
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+                                     struct mlxsw_sp_nve_mc_entry *mc_entry,
+                                     const union mlxsw_sp_l3addr *addr)
+{
+       WARN_ON(1);
+
+       return -EINVAL;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+                                     const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+                                     const struct mlxsw_sp_nve_mc_entry *mc_entry,
+                                     char *tnumt_pl, unsigned int entry_index)
+{
+       u32 udip_ptr = mc_entry->ipv6_entry.addr6_kvdl_index;
+
+       mlxsw_reg_tnumt_udip_ptr_set(tnumt_pl, entry_index, udip_ptr);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv6_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+                                         const struct mlxsw_sp_nve_mc_entry *mc_entry,
+                                         const union mlxsw_sp_l3addr *addr)
+{
+       return ipv6_addr_equal(&mc_entry->ipv6_entry.addr6, &addr->addr6);
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv6_ops = {
+       .type           = MLXSW_REG_TNUMT_RECORD_TYPE_IPV6,
+       .entry_add      = &mlxsw_sp_nve_mc_record_ipv6_entry_add,
+       .entry_del      = &mlxsw_sp_nve_mc_record_ipv6_entry_del,
+       .entry_set      = &mlxsw_sp_nve_mc_record_ipv6_entry_set,
+       .entry_compare  = &mlxsw_sp_nve_mc_record_ipv6_entry_compare,
+};
+
+static const struct mlxsw_sp_nve_mc_record_ops *
+mlxsw_sp_nve_mc_record_ops_arr[] = {
+       [MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_nve_mc_record_ipv4_ops,
+       [MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops,
+};
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp,
+                         const struct mlxsw_sp_nve_mc_list_key *key)
+{
+       struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+       return rhashtable_lookup_fast(&nve->mc_list_ht, key,
+                                     mlxsw_sp_nve_mc_list_ht_params);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_create(struct mlxsw_sp *mlxsw_sp,
+                           const struct mlxsw_sp_nve_mc_list_key *key)
+{
+       struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+       struct mlxsw_sp_nve_mc_list *mc_list;
+       int err;
+
+       mc_list = kmalloc(sizeof(*mc_list), GFP_KERNEL);
+       if (!mc_list)
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&mc_list->records_list);
+       mc_list->key = *key;
+
+       err = rhashtable_insert_fast(&nve->mc_list_ht, &mc_list->ht_node,
+                                    mlxsw_sp_nve_mc_list_ht_params);
+       if (err)
+               goto err_rhashtable_insert;
+
+       return mc_list;
+
+err_rhashtable_insert:
+       kfree(mc_list);
+       return ERR_PTR(err);
+}
+
+static void mlxsw_sp_nve_mc_list_destroy(struct mlxsw_sp *mlxsw_sp,
+                                        struct mlxsw_sp_nve_mc_list *mc_list)
+{
+       struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+       rhashtable_remove_fast(&nve->mc_list_ht, &mc_list->ht_node,
+                              mlxsw_sp_nve_mc_list_ht_params);
+       WARN_ON(!list_empty(&mc_list->records_list));
+       kfree(mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_get(struct mlxsw_sp *mlxsw_sp,
+                        const struct mlxsw_sp_nve_mc_list_key *key)
+{
+       struct mlxsw_sp_nve_mc_list *mc_list;
+
+       mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, key);
+       if (mc_list)
+               return mc_list;
+
+       return mlxsw_sp_nve_mc_list_create(mlxsw_sp, key);
+}
+
+static void
+mlxsw_sp_nve_mc_list_put(struct mlxsw_sp *mlxsw_sp,
+                        struct mlxsw_sp_nve_mc_list *mc_list)
+{
+       if (!list_empty(&mc_list->records_list))
+               return;
+       mlxsw_sp_nve_mc_list_destroy(mlxsw_sp, mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_nve_mc_list *mc_list,
+                             enum mlxsw_sp_l3proto proto)
+{
+       unsigned int num_max_entries = mlxsw_sp->nve->num_max_mc_entries[proto];
+       struct mlxsw_sp_nve_mc_record *mc_record;
+       int err;
+
+       mc_record = kzalloc(sizeof(*mc_record) + num_max_entries *
+                           sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL);
+       if (!mc_record)
+               return ERR_PTR(-ENOMEM);
+
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+                                 &mc_record->kvdl_index);
+       if (err)
+               goto err_kvdl_alloc;
+
+       mc_record->ops = mlxsw_sp_nve_mc_record_ops_arr[proto];
+       mc_record->mlxsw_sp = mlxsw_sp;
+       mc_record->mc_list = mc_list;
+       mc_record->proto = proto;
+       list_add_tail(&mc_record->list, &mc_list->records_list);
+
+       return mc_record;
+
+err_kvdl_alloc:
+       kfree(mc_record);
+       return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nve_mc_record_destroy(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+       struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+
+       list_del(&mc_record->list);
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+                          mc_record->kvdl_index);
+       WARN_ON(mc_record->num_entries);
+       kfree(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_get(struct mlxsw_sp *mlxsw_sp,
+                          struct mlxsw_sp_nve_mc_list *mc_list,
+                          enum mlxsw_sp_l3proto proto)
+{
+       struct mlxsw_sp_nve_mc_record *mc_record;
+
+       list_for_each_entry_reverse(mc_record, &mc_list->records_list, list) {
+               unsigned int num_entries = mc_record->num_entries;
+               struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+               if (mc_record->proto == proto &&
+                   num_entries < nve->num_max_mc_entries[proto])
+                       return mc_record;
+       }
+
+       return mlxsw_sp_nve_mc_record_create(mlxsw_sp, mc_list, proto);
+}
+
+static void
+mlxsw_sp_nve_mc_record_put(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+       if (mc_record->num_entries != 0)
+               return;
+
+       mlxsw_sp_nve_mc_record_destroy(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_free_entry_find(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+       struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+       unsigned int num_max_entries;
+       int i;
+
+       num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+       for (i = 0; i < num_max_entries; i++) {
+               if (mc_record->entries[i].valid)
+                       continue;
+               return &mc_record->entries[i];
+       }
+
+       return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_refresh(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+       enum mlxsw_reg_tnumt_record_type type = mc_record->ops->type;
+       struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+       struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+       char tnumt_pl[MLXSW_REG_TNUMT_LEN];
+       unsigned int num_max_entries;
+       unsigned int num_entries = 0;
+       u32 next_kvdl_index = 0;
+       bool next_valid = false;
+       int i;
+
+       if (!list_is_last(&mc_record->list, &mc_list->records_list)) {
+               struct mlxsw_sp_nve_mc_record *next_record;
+
+               next_record = list_next_entry(mc_record, list);
+               next_kvdl_index = next_record->kvdl_index;
+               next_valid = true;
+       }
+
+       mlxsw_reg_tnumt_pack(tnumt_pl, type, MLXSW_REG_TNUMT_TUNNEL_PORT_NVE,
+                            mc_record->kvdl_index, next_valid,
+                            next_kvdl_index, mc_record->num_entries);
+
+       num_max_entries = mlxsw_sp->nve->num_max_mc_entries[mc_record->proto];
+       for (i = 0; i < num_max_entries; i++) {
+               struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+               mc_entry = &mc_record->entries[i];
+               if (!mc_entry->valid)
+                       continue;
+               mc_record->ops->entry_set(mc_record, mc_entry, tnumt_pl,
+                                         num_entries++);
+       }
+
+       WARN_ON(num_entries != mc_record->num_entries);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnumt), tnumt_pl);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_is_first(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+       struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+       struct mlxsw_sp_nve_mc_record *first_record;
+
+       first_record = list_first_entry(&mc_list->records_list,
+                                       struct mlxsw_sp_nve_mc_record, list);
+
+       return mc_record == first_record;
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_entry_find(struct mlxsw_sp_nve_mc_record *mc_record,
+                          union mlxsw_sp_l3addr *addr)
+{
+       struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+       unsigned int num_max_entries;
+       int i;
+
+       num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+       for (i = 0; i < num_max_entries; i++) {
+               struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+               mc_entry = &mc_record->entries[i];
+               if (!mc_entry->valid)
+                       continue;
+               if (mc_record->ops->entry_compare(mc_record, mc_entry, addr))
+                       return mc_entry;
+       }
+
+       return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_ip_add(struct mlxsw_sp_nve_mc_record *mc_record,
+                             union mlxsw_sp_l3addr *addr)
+{
+       struct mlxsw_sp_nve_mc_entry *mc_entry = NULL;
+       int err;
+
+       mc_entry = mlxsw_sp_nve_mc_free_entry_find(mc_record);
+       if (WARN_ON(!mc_entry))
+               return -EINVAL;
+
+       err = mc_record->ops->entry_add(mc_record, mc_entry, addr);
+       if (err)
+               return err;
+       mc_record->num_entries++;
+       mc_entry->valid = true;
+
+       err = mlxsw_sp_nve_mc_record_refresh(mc_record);
+       if (err)
+               goto err_record_refresh;
+
+       /* If this is a new record and not the first one, then we need to
+        * update the next pointer of the previous entry
+        */
+       if (mc_record->num_entries != 1 ||
+           mlxsw_sp_nve_mc_record_is_first(mc_record))
+               return 0;
+
+       err = mlxsw_sp_nve_mc_record_refresh(list_prev_entry(mc_record, list));
+       if (err)
+               goto err_prev_record_refresh;
+
+       return 0;
+
+err_prev_record_refresh:
+err_record_refresh:
+       mc_entry->valid = false;
+       mc_record->num_entries--;
+       mc_record->ops->entry_del(mc_record, mc_entry);
+       return err;
+}
+
+static void
+mlxsw_sp_nve_mc_record_entry_del(struct mlxsw_sp_nve_mc_record *mc_record,
+                                struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+       struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+
+       mc_entry->valid = false;
+       mc_record->num_entries--;
+
+       /* When the record continues to exist we only need to invalidate
+        * the requested entry
+        */
+       if (mc_record->num_entries != 0) {
+               mlxsw_sp_nve_mc_record_refresh(mc_record);
+               mc_record->ops->entry_del(mc_record, mc_entry);
+               return;
+       }
+
+       /* If the record needs to be deleted, but it is not the first,
+        * then we need to make sure that the previous record no longer
+        * points to it. Remove deleted record from the list to reflect
+        * that and then re-add it at the end, so that it could be
+        * properly removed by the record destruction code
+        */
+       if (!mlxsw_sp_nve_mc_record_is_first(mc_record)) {
+               struct mlxsw_sp_nve_mc_record *prev_record;
+
+               prev_record = list_prev_entry(mc_record, list);
+               list_del(&mc_record->list);
+               mlxsw_sp_nve_mc_record_refresh(prev_record);
+               list_add_tail(&mc_record->list, &mc_list->records_list);
+               mc_record->ops->entry_del(mc_record, mc_entry);
+               return;
+       }
+
+       /* If the first record needs to be deleted, but the list is not
+        * singular, then the second record needs to be written in the
+        * first record's address, as this address is stored as a property
+        * of the FID
+        */
+       if (mlxsw_sp_nve_mc_record_is_first(mc_record) &&
+           !list_is_singular(&mc_list->records_list)) {
+               struct mlxsw_sp_nve_mc_record *next_record;
+
+               next_record = list_next_entry(mc_record, list);
+               swap(mc_record->kvdl_index, next_record->kvdl_index);
+               mlxsw_sp_nve_mc_record_refresh(next_record);
+               mc_record->ops->entry_del(mc_record, mc_entry);
+               return;
+       }
+
+       /* This is the last case where the last remaining record needs to
+        * be deleted. Simply delete the entry
+        */
+       mc_record->ops->entry_del(mc_record, mc_entry);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_find(struct mlxsw_sp_nve_mc_list *mc_list,
+                           enum mlxsw_sp_l3proto proto,
+                           union mlxsw_sp_l3addr *addr,
+                           struct mlxsw_sp_nve_mc_entry **mc_entry)
+{
+       struct mlxsw_sp_nve_mc_record *mc_record;
+
+       list_for_each_entry(mc_record, &mc_list->records_list, list) {
+               if (mc_record->proto != proto)
+                       continue;
+
+               *mc_entry = mlxsw_sp_nve_mc_entry_find(mc_record, addr);
+               if (*mc_entry)
+                       return mc_record;
+       }
+
+       return NULL;
+}
+
+static int mlxsw_sp_nve_mc_list_ip_add(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_nve_mc_list *mc_list,
+                                      enum mlxsw_sp_l3proto proto,
+                                      union mlxsw_sp_l3addr *addr)
+{
+       struct mlxsw_sp_nve_mc_record *mc_record;
+       int err;
+
+       mc_record = mlxsw_sp_nve_mc_record_get(mlxsw_sp, mc_list, proto);
+       if (IS_ERR(mc_record))
+               return PTR_ERR(mc_record);
+
+       err = mlxsw_sp_nve_mc_record_ip_add(mc_record, addr);
+       if (err)
+               goto err_ip_add;
+
+       return 0;
+
+err_ip_add:
+       mlxsw_sp_nve_mc_record_put(mc_record);
+       return err;
+}
+
+static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp_nve_mc_list *mc_list,
+                                       enum mlxsw_sp_l3proto proto,
+                                       union mlxsw_sp_l3addr *addr)
+{
+       struct mlxsw_sp_nve_mc_record *mc_record;
+       struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+       mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr,
+                                               &mc_entry);
+       if (WARN_ON(!mc_record))
+               return;
+
+       mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+       mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static int
+mlxsw_sp_nve_fid_flood_index_set(struct mlxsw_sp_fid *fid,
+                                struct mlxsw_sp_nve_mc_list *mc_list)
+{
+       struct mlxsw_sp_nve_mc_record *mc_record;
+
+       /* The address of the first record in the list is a property of
+        * the FID and we never change it. It only needs to be set when
+        * a new list is created
+        */
+       if (mlxsw_sp_fid_nve_flood_index_is_set(fid))
+               return 0;
+
+       mc_record = list_first_entry(&mc_list->records_list,
+                                    struct mlxsw_sp_nve_mc_record, list);
+
+       return mlxsw_sp_fid_nve_flood_index_set(fid, mc_record->kvdl_index);
+}
+
+static void
+mlxsw_sp_nve_fid_flood_index_clear(struct mlxsw_sp_fid *fid,
+                                  struct mlxsw_sp_nve_mc_list *mc_list)
+{
+       struct mlxsw_sp_nve_mc_record *mc_record;
+
+       /* The address of the first record needs to be invalidated only when
+        * the last record is about to be removed
+        */
+       if (!list_is_singular(&mc_list->records_list))
+               return;
+
+       mc_record = list_first_entry(&mc_list->records_list,
+                                    struct mlxsw_sp_nve_mc_record, list);
+       if (mc_record->num_entries != 1)
+               return;
+
+       return mlxsw_sp_fid_nve_flood_index_clear(fid);
+}
+
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_fid *fid,
+                             enum mlxsw_sp_l3proto proto,
+                             union mlxsw_sp_l3addr *addr)
+{
+       struct mlxsw_sp_nve_mc_list_key key = { 0 };
+       struct mlxsw_sp_nve_mc_list *mc_list;
+       int err;
+
+       key.fid_index = mlxsw_sp_fid_index(fid);
+       mc_list = mlxsw_sp_nve_mc_list_get(mlxsw_sp, &key);
+       if (IS_ERR(mc_list))
+               return PTR_ERR(mc_list);
+
+       err = mlxsw_sp_nve_mc_list_ip_add(mlxsw_sp, mc_list, proto, addr);
+       if (err)
+               goto err_add_ip;
+
+       err = mlxsw_sp_nve_fid_flood_index_set(fid, mc_list);
+       if (err)
+               goto err_fid_flood_index_set;
+
+       return 0;
+
+err_fid_flood_index_set:
+       mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+err_add_ip:
+       mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+       return err;
+}
+
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+                              struct mlxsw_sp_fid *fid,
+                              enum mlxsw_sp_l3proto proto,
+                              union mlxsw_sp_l3addr *addr)
+{
+       struct mlxsw_sp_nve_mc_list_key key = { 0 };
+       struct mlxsw_sp_nve_mc_list *mc_list;
+
+       key.fid_index = mlxsw_sp_fid_index(fid);
+       mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+       if (WARN_ON(!mc_list))
+               return;
+
+       mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list);
+       mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+       mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+static void
+mlxsw_sp_nve_mc_record_delete(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+       struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+       unsigned int num_max_entries;
+       int i;
+
+       num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+       for (i = 0; i < num_max_entries; i++) {
+               struct mlxsw_sp_nve_mc_entry *mc_entry = &mc_record->entries[i];
+
+               if (!mc_entry->valid)
+                       continue;
+               mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+       }
+
+       WARN_ON(mc_record->num_entries);
+       mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp,
+                                       struct mlxsw_sp_fid *fid)
+{
+       struct mlxsw_sp_nve_mc_record *mc_record, *tmp;
+       struct mlxsw_sp_nve_mc_list_key key = { 0 };
+       struct mlxsw_sp_nve_mc_list *mc_list;
+
+       if (!mlxsw_sp_fid_nve_flood_index_is_set(fid))
+               return;
+
+       mlxsw_sp_fid_nve_flood_index_clear(fid);
+
+       key.fid_index = mlxsw_sp_fid_index(fid);
+       mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+       if (WARN_ON(!mc_list))
+               return;
+
+       list_for_each_entry_safe(mc_record, tmp, &mc_list->records_list, list)
+               mlxsw_sp_nve_mc_record_delete(mc_record);
+
+       WARN_ON(!list_empty(&mc_list->records_list));
+       mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp)
+{
+       WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0);
+
+       return mlxsw_sp->nve->tunnel_index;
+}
+
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+                                     u32 tb_id, __be32 addr)
+{
+       struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+       struct mlxsw_sp_nve_config *config = &nve->config;
+
+       if (nve->num_nve_tunnels &&
+           config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 &&
+           config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id)
+               return true;
+
+       return false;
+}
+
+static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp,
+                                   struct mlxsw_sp_nve_config *config)
+{
+       struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+       const struct mlxsw_sp_nve_ops *ops;
+       int err;
+
+       if (nve->num_nve_tunnels++ != 0)
+               return 0;
+
+       err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+                                 &nve->tunnel_index);
+       if (err)
+               goto err_kvdl_alloc;
+
+       ops = nve->nve_ops_arr[config->type];
+       err = ops->init(nve, config);
+       if (err)
+               goto err_ops_init;
+
+       return 0;
+
+err_ops_init:
+       mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+                          nve->tunnel_index);
+err_kvdl_alloc:
+       nve->num_nve_tunnels--;
+       return err;
+}
+
+static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+       const struct mlxsw_sp_nve_ops *ops;
+
+       ops = nve->nve_ops_arr[nve->config.type];
+
+       if (mlxsw_sp->nve->num_nve_tunnels == 1) {
+               ops->fini(nve);
+               mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+                                  nve->tunnel_index);
+       }
+       nve->num_nve_tunnels--;
+}
+
+static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp,
+                                         u16 fid_index)
+{
+       char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+       mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID);
+       mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index);
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+                           struct mlxsw_sp_nve_params *params,
+                           struct netlink_ext_ack *extack)
+{
+       struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+       const struct mlxsw_sp_nve_ops *ops;
+       struct mlxsw_sp_nve_config config;
+       int err;
+
+       ops = nve->nve_ops_arr[params->type];
+
+       if (!ops->can_offload(nve, params->dev, extack))
+               return -EOPNOTSUPP;
+
+       memset(&config, 0, sizeof(config));
+       ops->nve_config(nve, params->dev, &config);
+       if (nve->num_nve_tunnels &&
+           memcmp(&config, &nve->config, sizeof(config))) {
+               NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration");
+               return -EOPNOTSUPP;
+       }
+
+       err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Failed to initialize NVE tunnel");
+               return err;
+       }
+
+       err = mlxsw_sp_fid_vni_set(fid, params->vni);
+       if (err) {
+               NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID");
+               goto err_fid_vni_set;
+       }
+
+       nve->config = config;
+
+       return 0;
+
+err_fid_vni_set:
+       mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+       return err;
+}
+
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+                             struct mlxsw_sp_fid *fid)
+{
+       u16 fid_index = mlxsw_sp_fid_index(fid);
+
+       mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid);
+       mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index);
+       mlxsw_sp_fid_vni_clear(fid);
+       mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+}
+
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       char tnqdr_pl[MLXSW_REG_TNQDR_LEN];
+
+       mlxsw_reg_tnqdr_pack(tnqdr_pl, mlxsw_sp_port->local_port);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqdr), tnqdr_pl);
+}
+
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+}
+
+static int mlxsw_sp_nve_qos_init(struct mlxsw_sp *mlxsw_sp)
+{
+       char tnqcr_pl[MLXSW_REG_TNQCR_LEN];
+
+       mlxsw_reg_tnqcr_pack(tnqcr_pl);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqcr), tnqcr_pl);
+}
+
+static int mlxsw_sp_nve_ecn_encap_init(struct mlxsw_sp *mlxsw_sp)
+{
+       int i;
+
+       /* Iterate over inner ECN values */
+       for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+               u8 outer_ecn = INET_ECN_encapsulate(0, i);
+               char tneem_pl[MLXSW_REG_TNEEM_LEN];
+               int err;
+
+               mlxsw_reg_tneem_pack(tneem_pl, i, outer_ecn);
+               err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tneem),
+                                     tneem_pl);
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp,
+                                        u8 inner_ecn, u8 outer_ecn)
+{
+       char tndem_pl[MLXSW_REG_TNDEM_LEN];
+       bool trap_en, set_ce = false;
+       u8 new_inner_ecn;
+
+       trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce);
+       new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn;
+
+       mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn,
+                            trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl);
+}
+
+static int mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp)
+{
+       int i;
+
+       /* Iterate over inner ECN values */
+       for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+               int j;
+
+               /* Iterate over outer ECN values */
+               for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) {
+                       int err;
+
+                       err = __mlxsw_sp_nve_ecn_decap_init(mlxsw_sp, i, j);
+                       if (err)
+                               return err;
+               }
+       }
+
+       return 0;
+}
+
+static int mlxsw_sp_nve_ecn_init(struct mlxsw_sp *mlxsw_sp)
+{
+       int err;
+
+       err = mlxsw_sp_nve_ecn_encap_init(mlxsw_sp);
+       if (err)
+               return err;
+
+       return mlxsw_sp_nve_ecn_decap_init(mlxsw_sp);
+}
+
+static int mlxsw_sp_nve_resources_query(struct mlxsw_sp *mlxsw_sp)
+{
+       unsigned int max;
+
+       if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4) ||
+           !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6))
+               return -EIO;
+       max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4);
+       mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV4] = max;
+       max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6);
+       mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV6] = max;
+
+       return 0;
+}
+
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp)
+{
+       struct mlxsw_sp_nve *nve;
+       int err;
+
+       nve = kzalloc(sizeof(*mlxsw_sp->nve), GFP_KERNEL);
+       if (!nve)
+               return -ENOMEM;
+       mlxsw_sp->nve = nve;
+       nve->mlxsw_sp = mlxsw_sp;
+       nve->nve_ops_arr = mlxsw_sp->nve_ops_arr;
+
+       err = rhashtable_init(&nve->mc_list_ht,
+                             &mlxsw_sp_nve_mc_list_ht_params);
+       if (err)
+               goto err_rhashtable_init;
+
+       err = mlxsw_sp_nve_qos_init(mlxsw_sp);
+       if (err)
+               goto err_nve_qos_init;
+
+       err = mlxsw_sp_nve_ecn_init(mlxsw_sp);
+       if (err)
+               goto err_nve_ecn_init;
+
+       err = mlxsw_sp_nve_resources_query(mlxsw_sp);
+       if (err)
+               goto err_nve_resources_query;
+
+       return 0;
+
+err_nve_resources_query:
+err_nve_ecn_init:
+err_nve_qos_init:
+       rhashtable_destroy(&nve->mc_list_ht);
+err_rhashtable_init:
+       mlxsw_sp->nve = NULL;
+       kfree(nve);
+       return err;
+}
+
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp)
+{
+       WARN_ON(mlxsw_sp->nve->num_nve_tunnels);
+       rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht);
+       mlxsw_sp->nve = NULL;
+       kfree(mlxsw_sp->nve);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
new file mode 100644 (file)
index 0000000..4cc3297
--- /dev/null
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_NVE_H
+#define _MLXSW_SPECTRUM_NVE_H
+
+#include <linux/netlink.h>
+#include <linux/rhashtable.h>
+
+#include "spectrum.h"
+
+struct mlxsw_sp_nve_config {
+       enum mlxsw_sp_nve_type type;
+       u8 ttl;
+       u8 learning_en:1;
+       __be16 udp_dport;
+       __be32 flowlabel;
+       u32 ul_tb_id;
+       enum mlxsw_sp_l3proto ul_proto;
+       union mlxsw_sp_l3addr ul_sip;
+};
+
+struct mlxsw_sp_nve {
+       struct mlxsw_sp_nve_config config;
+       struct rhashtable mc_list_ht;
+       struct mlxsw_sp *mlxsw_sp;
+       const struct mlxsw_sp_nve_ops **nve_ops_arr;
+       unsigned int num_nve_tunnels;   /* Protected by RTNL */
+       unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX];
+       u32 tunnel_index;
+};
+
+struct mlxsw_sp_nve_ops {
+       enum mlxsw_sp_nve_type type;
+       bool (*can_offload)(const struct mlxsw_sp_nve *nve,
+                           const struct net_device *dev,
+                           struct netlink_ext_ack *extack);
+       void (*nve_config)(const struct mlxsw_sp_nve *nve,
+                          const struct net_device *dev,
+                          struct mlxsw_sp_nve_config *config);
+       int (*init)(struct mlxsw_sp_nve *nve,
+                   const struct mlxsw_sp_nve_config *config);
+       void (*fini)(struct mlxsw_sp_nve *nve);
+};
+
+extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops;
+extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops;
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
new file mode 100644 (file)
index 0000000..d21c7be
--- /dev/null
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <net/vxlan.h>
+
+#include "reg.h"
+#include "spectrum_nve.h"
+
+/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B)
+ *
+ * In the worst case - where we have a VLAN tag on the outer Ethernet
+ * header and IPv6 in overlay and underlay - we need to parse 128 bytes
+ */
+#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
+#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
+
+#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS     VXLAN_F_UDP_ZERO_CSUM_TX
+
+static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
+                                           const struct net_device *dev,
+                                           struct netlink_ext_ack *extack)
+{
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+       struct vxlan_config *cfg = &vxlan->cfg;
+
+       if (cfg->saddr.sa.sa_family != AF_INET) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported");
+               return false;
+       }
+
+       if (vxlan_addr_multicast(&cfg->remote_ip)) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported");
+               return false;
+       }
+
+       if (vxlan_addr_any(&cfg->saddr)) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified");
+               return false;
+       }
+
+       if (cfg->remote_ifindex) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported");
+               return false;
+       }
+
+       if (cfg->port_min || cfg->port_max) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported");
+               return false;
+       }
+
+       if (cfg->tos != 1) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit");
+               return false;
+       }
+
+       if (cfg->flags & VXLAN_F_TTL_INHERIT) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit");
+               return false;
+       }
+
+       if (cfg->flags & VXLAN_F_LEARN) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported");
+               return false;
+       }
+
+       if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported");
+               return false;
+       }
+
+       if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag");
+               return false;
+       }
+
+       if (cfg->ttl == 0) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0");
+               return false;
+       }
+
+       if (cfg->label != 0) {
+               NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0");
+               return false;
+       }
+
+       return true;
+}
+
+static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
+                                     const struct net_device *dev,
+                                     struct mlxsw_sp_nve_config *config)
+{
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+       struct vxlan_config *cfg = &vxlan->cfg;
+
+       config->type = MLXSW_SP_NVE_TYPE_VXLAN;
+       config->ttl = cfg->ttl;
+       config->flowlabel = cfg->label;
+       config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0;
+       config->ul_tb_id = RT_TABLE_MAIN;
+       config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
+       config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
+       config->udp_dport = cfg->dst_port;
+}
+
+static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
+                                   unsigned int parsing_depth,
+                                   __be16 udp_dport)
+{
+       char mprs_pl[MLXSW_REG_MPRS_LEN];
+
+       mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport));
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+}
+
+static int
+mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
+                              const struct mlxsw_sp_nve_config *config)
+{
+       char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+       u16 ul_vr_id;
+       u8 udp_sport;
+       int err;
+
+       err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id,
+                                         &ul_vr_id);
+       if (err)
+               return err;
+
+       mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true,
+                            config->ttl);
+       /* VxLAN driver's default UDP source port range is 32768 (0x8000)
+        * to 60999 (0xee47). Set the upper 8 bits of the UDP source port
+        * to a random number between 0x80 and 0xee
+        */
+       get_random_bytes(&udp_sport, sizeof(udp_sport));
+       udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80;
+       mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport);
+       mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en);
+       mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id);
+       mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4));
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
+{
+       char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+
+       mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
+
+       mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp,
+                                       unsigned int tunnel_index)
+{
+       char rtdp_pl[MLXSW_REG_RTDP_LEN];
+
+       mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index);
+
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+                                   const struct mlxsw_sp_nve_config *config)
+{
+       struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+       int err;
+
+       err = mlxsw_sp_nve_parsing_set(mlxsw_sp,
+                                      MLXSW_SP_NVE_VXLAN_PARSING_DEPTH,
+                                      config->udp_dport);
+       if (err)
+               return err;
+
+       err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config);
+       if (err)
+               goto err_config_set;
+
+       err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index);
+       if (err)
+               goto err_rtdp_set;
+
+       err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id,
+                                               config->ul_proto,
+                                               &config->ul_sip,
+                                               nve->tunnel_index);
+       if (err)
+               goto err_promote_decap;
+
+       return 0;
+
+err_promote_decap:
+err_rtdp_set:
+       mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+err_config_set:
+       mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
+                                config->udp_dport);
+       return err;
+}
+
+static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+       struct mlxsw_sp_nve_config *config = &nve->config;
+       struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+
+       mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
+                                        config->ul_proto, &config->ul_sip);
+       mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+       mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
+                                config->udp_dport);
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = {
+       .type           = MLXSW_SP_NVE_TYPE_VXLAN,
+       .can_offload    = mlxsw_sp1_nve_vxlan_can_offload,
+       .nve_config     = mlxsw_sp_nve_vxlan_config,
+       .init           = mlxsw_sp1_nve_vxlan_init,
+       .fini           = mlxsw_sp1_nve_vxlan_fini,
+};
+
+static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
+                                           const struct net_device *dev,
+                                           struct netlink_ext_ack *extack)
+{
+       return false;
+}
+
+static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+                                   const struct mlxsw_sp_nve_config *config)
+{
+       return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
+       .type           = MLXSW_SP_NVE_TYPE_VXLAN,
+       .can_offload    = mlxsw_sp2_nve_vxlan_can_offload,
+       .nve_config     = mlxsw_sp_nve_vxlan_config,
+       .init           = mlxsw_sp2_nve_vxlan_init,
+       .fini           = mlxsw_sp2_nve_vxlan_fini,
+};
index 2ab9cf25a08ae19788d28ffddaa8698ba2213152..9e9bb57134f2c868c63c69adc239b396244d444b 100644 (file)
@@ -366,6 +366,7 @@ enum mlxsw_sp_fib_entry_type {
         * encapsulating entries.)
         */
        MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
+       MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
 };
 
 struct mlxsw_sp_nexthop_group;
@@ -741,6 +742,19 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
        return NULL;
 }
 
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+                               u16 *vr_id)
+{
+       struct mlxsw_sp_vr *vr;
+
+       vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+       if (!vr)
+               return -ESRCH;
+       *vr_id = vr->id;
+
+       return 0;
+}
+
 static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
                                            enum mlxsw_sp_l3proto proto)
 {
@@ -1128,6 +1142,52 @@ mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
                mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
 }
 
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+                                    enum mlxsw_sp_l3proto proto,
+                                    const union mlxsw_sp_l3addr *addr,
+                                    enum mlxsw_sp_fib_entry_type type)
+{
+       struct mlxsw_sp_fib_entry *fib_entry;
+       struct mlxsw_sp_fib_node *fib_node;
+       unsigned char addr_prefix_len;
+       struct mlxsw_sp_fib *fib;
+       struct mlxsw_sp_vr *vr;
+       const void *addrp;
+       size_t addr_len;
+       u32 addr4;
+
+       vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+       if (!vr)
+               return NULL;
+       fib = mlxsw_sp_vr_fib(vr, proto);
+
+       switch (proto) {
+       case MLXSW_SP_L3_PROTO_IPV4:
+               addr4 = be32_to_cpu(addr->addr4);
+               addrp = &addr4;
+               addr_len = 4;
+               addr_prefix_len = 32;
+               break;
+       case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+       default:
+               WARN_ON(1);
+               return NULL;
+       }
+
+       fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
+                                           addr_prefix_len);
+       if (!fib_node || list_empty(&fib_node->entry_list))
+               return NULL;
+
+       fib_entry = list_first_entry(&fib_node->entry_list,
+                                    struct mlxsw_sp_fib_entry, list);
+       if (fib_entry->type != type)
+               return NULL;
+
+       return fib_entry;
+}
+
 /* Given an IPIP entry, find the corresponding decap route. */
 static struct mlxsw_sp_fib_entry *
 mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
@@ -1765,6 +1825,56 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
        return 0;
 }
 
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+                                     enum mlxsw_sp_l3proto ul_proto,
+                                     const union mlxsw_sp_l3addr *ul_sip,
+                                     u32 tunnel_index)
+{
+       enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+       struct mlxsw_sp_fib_entry *fib_entry;
+       int err;
+
+       /* It is valid to create a tunnel with a local IP and only later
+        * assign this IP address to a local interface
+        */
+       fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+                                                        ul_proto, ul_sip,
+                                                        type);
+       if (!fib_entry)
+               return 0;
+
+       fib_entry->decap.tunnel_index = tunnel_index;
+       fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+
+       err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+       if (err)
+               goto err_fib_entry_update;
+
+       return 0;
+
+err_fib_entry_update:
+       fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+       mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+       return err;
+}
+
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+                                     enum mlxsw_sp_l3proto ul_proto,
+                                     const union mlxsw_sp_l3addr *ul_sip)
+{
+       enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+       struct mlxsw_sp_fib_entry *fib_entry;
+
+       fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+                                                        ul_proto, ul_sip,
+                                                        type);
+       if (!fib_entry)
+               return;
+
+       fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+       mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+}
+
 struct mlxsw_sp_neigh_key {
        struct neighbour *n;
 };
@@ -3815,6 +3925,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
        case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
                return !!nh_group->nh_rif;
        case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+       case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
                return true;
        default:
                return false;
@@ -3848,7 +3959,8 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
        int i;
 
        if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
-           fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
+           fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
+           fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
                nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
                return;
        }
@@ -4072,6 +4184,18 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
                                      fib_entry->decap.tunnel_index);
 }
 
+static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
+                                          struct mlxsw_sp_fib_entry *fib_entry,
+                                          enum mlxsw_reg_ralue_op op)
+{
+       char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+       mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+       mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
+                                          fib_entry->decap.tunnel_index);
+       return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
 static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
                                   struct mlxsw_sp_fib_entry *fib_entry,
                                   enum mlxsw_reg_ralue_op op)
@@ -4086,6 +4210,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
        case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
                return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
                                                        fib_entry, op);
+       case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
+               return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
        }
        return -EINVAL;
 }
@@ -4121,6 +4247,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
                             struct mlxsw_sp_fib_entry *fib_entry)
 {
        union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
+       u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
        struct net_device *dev = fen_info->fi->fib_dev;
        struct mlxsw_sp_ipip_entry *ipip_entry;
        struct fib_info *fi = fen_info->fi;
@@ -4135,6 +4262,15 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
                                                             fib_entry,
                                                             ipip_entry);
                }
+               if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
+                                                    dip.addr4)) {
+                       u32 t_index;
+
+                       t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
+                       fib_entry->decap.tunnel_index = t_index;
+                       fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+                       return 0;
+               }
                /* fall through */
        case RTN_BROADCAST:
                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
index fa16ad2c6a50b530b86d437bd464187ea8ab8979..bc60d7a8b49d764b4066c50bc808963c52c27950 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/netlink.h>
 #include <net/switchdev.h>
+#include <net/vxlan.h>
 
 #include "spectrum_span.h"
 #include "spectrum_switchdev.h"
@@ -83,9 +84,19 @@ struct mlxsw_sp_bridge_ops {
        void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device,
                           struct mlxsw_sp_bridge_port *bridge_port,
                           struct mlxsw_sp_port *mlxsw_sp_port);
+       int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device,
+                         const struct net_device *vxlan_dev,
+                         struct netlink_ext_ack *extack);
+       void (*vxlan_leave)(struct mlxsw_sp_bridge_device *bridge_device,
+                           const struct net_device *vxlan_dev);
        struct mlxsw_sp_fid *
                (*fid_get)(struct mlxsw_sp_bridge_device *bridge_device,
                           u16 vid);
+       struct mlxsw_sp_fid *
+               (*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device,
+                             u16 vid);
+       u16 (*fid_vid)(struct mlxsw_sp_bridge_device *bridge_device,
+                      const struct mlxsw_sp_fid *fid);
 };
 
 static int
@@ -1236,6 +1247,51 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
                        MLXSW_REG_SFD_OP_WRITE_REMOVE;
 }
 
+static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp,
+                                         const char *mac, u16 fid,
+                                         enum mlxsw_sp_l3proto proto,
+                                         const union mlxsw_sp_l3addr *addr,
+                                         bool adding, bool dynamic)
+{
+       enum mlxsw_reg_sfd_uc_tunnel_protocol sfd_proto;
+       char *sfd_pl;
+       u8 num_rec;
+       u32 uip;
+       int err;
+
+       switch (proto) {
+       case MLXSW_SP_L3_PROTO_IPV4:
+               uip = be32_to_cpu(addr->addr4);
+               sfd_proto = MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4;
+               break;
+       case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+       default:
+               WARN_ON(1);
+               return -EOPNOTSUPP;
+       }
+
+       sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+       if (!sfd_pl)
+               return -ENOMEM;
+
+       mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+       mlxsw_reg_sfd_uc_tunnel_pack(sfd_pl, 0,
+                                    mlxsw_sp_sfd_rec_policy(dynamic), mac, fid,
+                                    MLXSW_REG_SFD_REC_ACTION_NOP, uip,
+                                    sfd_proto);
+       num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+       err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+       if (err)
+               goto out;
+
+       if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+               err = -EBUSY;
+
+out:
+       kfree(sfd_pl);
+       return err;
+}
+
 static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
                                     const char *mac, u16 fid, bool adding,
                                     enum mlxsw_reg_sfd_rec_action action,
@@ -1949,6 +2005,21 @@ mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
        mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
 }
 
+static int
+mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+                                const struct net_device *vxlan_dev,
+                                struct netlink_ext_ack *extack)
+{
+       WARN_ON(1);
+       return -EINVAL;
+}
+
+static void
+mlxsw_sp_bridge_8021q_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device,
+                                 const struct net_device *vxlan_dev)
+{
+}
+
 static struct mlxsw_sp_fid *
 mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
                              u16 vid)
@@ -1958,10 +2029,29 @@ mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
        return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid);
 }
 
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+                                u16 vid)
+{
+       WARN_ON(1);
+       return NULL;
+}
+
+static u16
+mlxsw_sp_bridge_8021q_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+                             const struct mlxsw_sp_fid *fid)
+{
+       return mlxsw_sp_fid_8021q_vid(fid);
+}
+
 static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = {
        .port_join      = mlxsw_sp_bridge_8021q_port_join,
        .port_leave     = mlxsw_sp_bridge_8021q_port_leave,
+       .vxlan_join     = mlxsw_sp_bridge_8021q_vxlan_join,
+       .vxlan_leave    = mlxsw_sp_bridge_8021q_vxlan_leave,
        .fid_get        = mlxsw_sp_bridge_8021q_fid_get,
+       .fid_lookup     = mlxsw_sp_bridge_8021q_fid_lookup,
+       .fid_vid        = mlxsw_sp_bridge_8021q_fid_vid,
 };
 
 static bool
@@ -2025,19 +2115,126 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
        mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
 }
 
+static int
+mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+                                const struct net_device *vxlan_dev,
+                                struct netlink_ext_ack *extack)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+       struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
+       struct mlxsw_sp_nve_params params = {
+               .type = MLXSW_SP_NVE_TYPE_VXLAN,
+               .vni = vxlan->cfg.vni,
+               .dev = vxlan_dev,
+       };
+       struct mlxsw_sp_fid *fid;
+       int err;
+
+       fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+       if (!fid)
+               return -EINVAL;
+
+       if (mlxsw_sp_fid_vni_is_set(fid))
+               return -EINVAL;
+
+       err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, &params, extack);
+       if (err)
+               goto err_nve_fid_enable;
+
+       /* The tunnel port does not hold a reference on the FID. Only
+        * local ports and the router port
+        */
+       mlxsw_sp_fid_put(fid);
+
+       return 0;
+
+err_nve_fid_enable:
+       mlxsw_sp_fid_put(fid);
+       return err;
+}
+
+static void
+mlxsw_sp_bridge_8021d_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device,
+                                 const struct net_device *vxlan_dev)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+       struct mlxsw_sp_fid *fid;
+
+       fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+       if (WARN_ON(!fid))
+               return;
+
+       /* If the VxLAN device is down, then the FID does not have a VNI */
+       if (!mlxsw_sp_fid_vni_is_set(fid))
+               goto out;
+
+       mlxsw_sp_nve_fid_disable(mlxsw_sp, fid);
+out:
+       mlxsw_sp_fid_put(fid);
+}
+
 static struct mlxsw_sp_fid *
 mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
                              u16 vid)
 {
        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+       struct net_device *vxlan_dev;
+       struct mlxsw_sp_fid *fid;
+       int err;
+
+       fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+       if (IS_ERR(fid))
+               return fid;
+
+       if (mlxsw_sp_fid_vni_is_set(fid))
+               return fid;
+
+       vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev);
+       if (!vxlan_dev)
+               return fid;
+
+       if (!netif_running(vxlan_dev))
+               return fid;
+
+       err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, NULL);
+       if (err)
+               goto err_vxlan_join;
+
+       return fid;
+
+err_vxlan_join:
+       mlxsw_sp_fid_put(fid);
+       return ERR_PTR(err);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021d_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+                                u16 vid)
+{
+       struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
 
-       return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+       /* The only valid VLAN for a VLAN-unaware bridge is 0 */
+       if (vid)
+               return NULL;
+
+       return mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+}
+
+static u16
+mlxsw_sp_bridge_8021d_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+                             const struct mlxsw_sp_fid *fid)
+{
+       return 0;
 }
 
 static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = {
        .port_join      = mlxsw_sp_bridge_8021d_port_join,
        .port_leave     = mlxsw_sp_bridge_8021d_port_leave,
+       .vxlan_join     = mlxsw_sp_bridge_8021d_vxlan_join,
+       .vxlan_leave    = mlxsw_sp_bridge_8021d_vxlan_leave,
        .fid_get        = mlxsw_sp_bridge_8021d_fid_get,
+       .fid_lookup     = mlxsw_sp_bridge_8021d_fid_lookup,
+       .fid_vid        = mlxsw_sp_bridge_8021d_fid_vid,
 };
 
 int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -2087,15 +2284,43 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
        mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
 }
 
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+                              const struct net_device *br_dev,
+                              const struct net_device *vxlan_dev,
+                              struct netlink_ext_ack *extack)
+{
+       struct mlxsw_sp_bridge_device *bridge_device;
+
+       bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+       if (WARN_ON(!bridge_device))
+               return -EINVAL;
+
+       return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, extack);
+}
+
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+                                const struct net_device *br_dev,
+                                const struct net_device *vxlan_dev)
+{
+       struct mlxsw_sp_bridge_device *bridge_device;
+
+       bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+       if (WARN_ON(!bridge_device))
+               return;
+
+       bridge_device->ops->vxlan_leave(bridge_device, vxlan_dev);
+}
+
 static void
 mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
                            const char *mac, u16 vid,
-                           struct net_device *dev)
+                           struct net_device *dev, bool offloaded)
 {
        struct switchdev_notifier_fdb_info info;
 
        info.addr = mac;
        info.vid = vid;
+       info.offloaded = offloaded;
        call_switchdev_notifiers(type, dev, &info.info);
 }
 
@@ -2147,7 +2372,7 @@ do_fdb_op:
        if (!do_notification)
                return;
        type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
-       mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+       mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
 
        return;
 
@@ -2207,7 +2432,7 @@ do_fdb_op:
        if (!do_notification)
                return;
        type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
-       mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+       mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
 
        return;
 
@@ -2283,11 +2508,126 @@ out:
 
 struct mlxsw_sp_switchdev_event_work {
        struct work_struct work;
-       struct switchdev_notifier_fdb_info fdb_info;
+       union {
+               struct switchdev_notifier_fdb_info fdb_info;
+               struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+       };
        struct net_device *dev;
        unsigned long event;
 };
 
+static void
+mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr,
+                                     enum mlxsw_sp_l3proto *proto,
+                                     union mlxsw_sp_l3addr *addr)
+{
+       if (vxlan_addr->sa.sa_family == AF_INET) {
+               addr->addr4 = vxlan_addr->sin.sin_addr.s_addr;
+               *proto = MLXSW_SP_L3_PROTO_IPV4;
+       } else {
+               addr->addr6 = vxlan_addr->sin6.sin6_addr;
+               *proto = MLXSW_SP_L3_PROTO_IPV6;
+       }
+}
+
+static void
+mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
+                                         struct mlxsw_sp_switchdev_event_work *
+                                         switchdev_work,
+                                         struct mlxsw_sp_fid *fid, __be32 vni)
+{
+       struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+       struct switchdev_notifier_fdb_info *fdb_info;
+       struct net_device *dev = switchdev_work->dev;
+       enum mlxsw_sp_l3proto proto;
+       union mlxsw_sp_l3addr addr;
+       int err;
+
+       fdb_info = &switchdev_work->fdb_info;
+       err = vxlan_fdb_find_uc(dev, fdb_info->addr, vni, &vxlan_fdb_info);
+       if (err)
+               return;
+
+       mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info.remote_ip,
+                                             &proto, &addr);
+
+       switch (switchdev_work->event) {
+       case SWITCHDEV_FDB_ADD_TO_DEVICE:
+               err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+                                                    vxlan_fdb_info.eth_addr,
+                                                    mlxsw_sp_fid_index(fid),
+                                                    proto, &addr, true, false);
+               if (err)
+                       return;
+               vxlan_fdb_info.offloaded = true;
+               call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+                                        &vxlan_fdb_info.info);
+               mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+                                           vxlan_fdb_info.eth_addr,
+                                           fdb_info->vid, dev, true);
+               break;
+       case SWITCHDEV_FDB_DEL_TO_DEVICE:
+               err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+                                                    vxlan_fdb_info.eth_addr,
+                                                    mlxsw_sp_fid_index(fid),
+                                                    proto, &addr, false,
+                                                    false);
+               vxlan_fdb_info.offloaded = false;
+               call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+                                        &vxlan_fdb_info.info);
+               break;
+       }
+}
+
+static void
+mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work *
+                                       switchdev_work)
+{
+       struct mlxsw_sp_bridge_device *bridge_device;
+       struct net_device *dev = switchdev_work->dev;
+       struct net_device *br_dev;
+       struct mlxsw_sp *mlxsw_sp;
+       struct mlxsw_sp_fid *fid;
+       __be32 vni;
+       int err;
+
+       if (switchdev_work->event != SWITCHDEV_FDB_ADD_TO_DEVICE &&
+           switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE)
+               return;
+
+       if (!switchdev_work->fdb_info.added_by_user)
+               return;
+
+       if (!netif_running(dev))
+               return;
+       br_dev = netdev_master_upper_dev_get(dev);
+       if (!br_dev)
+               return;
+       if (!netif_is_bridge_master(br_dev))
+               return;
+       mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+       if (!mlxsw_sp)
+               return;
+       bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+       if (!bridge_device)
+               return;
+
+       fid = bridge_device->ops->fid_lookup(bridge_device,
+                                            switchdev_work->fdb_info.vid);
+       if (!fid)
+               return;
+
+       err = mlxsw_sp_fid_vni(fid, &vni);
+       if (err)
+               goto out;
+
+       mlxsw_sp_switchdev_bridge_vxlan_fdb_event(mlxsw_sp, switchdev_work, fid,
+                                                 vni);
+
+out:
+       mlxsw_sp_fid_put(fid);
+}
+
 static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
 {
        struct mlxsw_sp_switchdev_event_work *switchdev_work =
@@ -2298,6 +2638,11 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
        int err;
 
        rtnl_lock();
+       if (netif_is_vxlan(dev)) {
+               mlxsw_sp_switchdev_bridge_nve_fdb_event(switchdev_work);
+               goto out;
+       }
+
        mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
        if (!mlxsw_sp_port)
                goto out;
@@ -2312,7 +2657,7 @@ static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
                        break;
                mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
                                            fdb_info->addr,
-                                           fdb_info->vid, dev);
+                                           fdb_info->vid, dev, true);
                break;
        case SWITCHDEV_FDB_DEL_TO_DEVICE:
                fdb_info = &switchdev_work->fdb_info;
@@ -2337,6 +2682,189 @@ out:
        dev_put(dev);
 }
 
+static void
+mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_switchdev_event_work *
+                                switchdev_work)
+{
+       struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+       struct mlxsw_sp_bridge_device *bridge_device;
+       struct net_device *dev = switchdev_work->dev;
+       u8 all_zeros_mac[ETH_ALEN] = { 0 };
+       enum mlxsw_sp_l3proto proto;
+       union mlxsw_sp_l3addr addr;
+       struct net_device *br_dev;
+       struct mlxsw_sp_fid *fid;
+       u16 vid;
+       int err;
+
+       vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+       br_dev = netdev_master_upper_dev_get(dev);
+
+       bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+       if (!bridge_device)
+               return;
+
+       fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+       if (!fid)
+               return;
+
+       mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+                                             &proto, &addr);
+
+       if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+               err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr);
+               if (err) {
+                       mlxsw_sp_fid_put(fid);
+                       return;
+               }
+               vxlan_fdb_info->offloaded = true;
+               call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+                                        &vxlan_fdb_info->info);
+               mlxsw_sp_fid_put(fid);
+               return;
+       }
+
+       /* The device has a single FDB table, whereas Linux has two - one
+        * in the bridge driver and another in the VxLAN driver. We only
+        * program an entry to the device if the MAC points to the VxLAN
+        * device in the bridge's FDB table
+        */
+       vid = bridge_device->ops->fid_vid(bridge_device, fid);
+       if (br_fdb_find_port(br_dev, vxlan_fdb_info->eth_addr, vid) != dev)
+               goto err_br_fdb_find;
+
+       err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+                                            mlxsw_sp_fid_index(fid), proto,
+                                            &addr, true, false);
+       if (err)
+               goto err_fdb_tunnel_uc_op;
+       vxlan_fdb_info->offloaded = true;
+       call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+                                &vxlan_fdb_info->info);
+       mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+                                   vxlan_fdb_info->eth_addr, vid, dev, true);
+
+       mlxsw_sp_fid_put(fid);
+
+       return;
+
+err_fdb_tunnel_uc_op:
+err_br_fdb_find:
+       mlxsw_sp_fid_put(fid);
+}
+
+static void
+mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp,
+                                struct mlxsw_sp_switchdev_event_work *
+                                switchdev_work)
+{
+       struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+       struct mlxsw_sp_bridge_device *bridge_device;
+       struct net_device *dev = switchdev_work->dev;
+       struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+       u8 all_zeros_mac[ETH_ALEN] = { 0 };
+       enum mlxsw_sp_l3proto proto;
+       union mlxsw_sp_l3addr addr;
+       struct mlxsw_sp_fid *fid;
+       u16 vid;
+
+       vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+
+       bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+       if (!bridge_device)
+               return;
+
+       fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+       if (!fid)
+               return;
+
+       mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+                                             &proto, &addr);
+
+       if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+               mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr);
+               mlxsw_sp_fid_put(fid);
+               return;
+       }
+
+       mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+                                      mlxsw_sp_fid_index(fid), proto, &addr,
+                                      false, false);
+       vid = bridge_device->ops->fid_vid(bridge_device, fid);
+       mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+                                   vxlan_fdb_info->eth_addr, vid, dev, false);
+
+       mlxsw_sp_fid_put(fid);
+}
+
+static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work)
+{
+       struct mlxsw_sp_switchdev_event_work *switchdev_work =
+               container_of(work, struct mlxsw_sp_switchdev_event_work, work);
+       struct net_device *dev = switchdev_work->dev;
+       struct mlxsw_sp *mlxsw_sp;
+       struct net_device *br_dev;
+
+       rtnl_lock();
+
+       if (!netif_running(dev))
+               goto out;
+       br_dev = netdev_master_upper_dev_get(dev);
+       if (!br_dev)
+               goto out;
+       if (!netif_is_bridge_master(br_dev))
+               goto out;
+       mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+       if (!mlxsw_sp)
+               goto out;
+
+       switch (switchdev_work->event) {
+       case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE:
+               mlxsw_sp_switchdev_vxlan_fdb_add(mlxsw_sp, switchdev_work);
+               break;
+       case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+               mlxsw_sp_switchdev_vxlan_fdb_del(mlxsw_sp, switchdev_work);
+               break;
+       }
+
+out:
+       rtnl_unlock();
+       kfree(switchdev_work);
+       dev_put(dev);
+}
+
+static int
+mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work *
+                                     switchdev_work,
+                                     struct switchdev_notifier_info *info)
+{
+       struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev);
+       struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+       struct vxlan_config *cfg = &vxlan->cfg;
+
+       vxlan_fdb_info = container_of(info,
+                                     struct switchdev_notifier_vxlan_fdb_info,
+                                     info);
+
+       if (vxlan_fdb_info->remote_port != cfg->dst_port)
+               return -EOPNOTSUPP;
+       if (vxlan_fdb_info->remote_vni != cfg->vni)
+               return -EOPNOTSUPP;
+       if (vxlan_fdb_info->vni != cfg->vni)
+               return -EOPNOTSUPP;
+       if (vxlan_fdb_info->remote_ifindex)
+               return -EOPNOTSUPP;
+       if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr))
+               return -EOPNOTSUPP;
+       if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip))
+               return -EOPNOTSUPP;
+
+       switchdev_work->vxlan_fdb_info = *vxlan_fdb_info;
+
+       return 0;
+}
+
 /* Called under rcu_read_lock() */
 static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
                                    unsigned long event, void *ptr)
@@ -2346,6 +2874,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
        struct switchdev_notifier_fdb_info *fdb_info;
        struct switchdev_notifier_info *info = ptr;
        struct net_device *br_dev;
+       int err;
 
        /* Tunnel devices are not our uppers, so check their master instead */
        br_dev = netdev_master_upper_dev_get_rcu(dev);
@@ -2386,6 +2915,16 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
                 */
                dev_hold(dev);
                break;
+       case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: /* fall through */
+       case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+               INIT_WORK(&switchdev_work->work,
+                         mlxsw_sp_switchdev_vxlan_fdb_event_work);
+               err = mlxsw_sp_switchdev_vxlan_work_prepare(switchdev_work,
+                                                           info);
+               if (err)
+                       goto err_vxlan_work_prepare;
+               dev_hold(dev);
+               break;
        default:
                kfree(switchdev_work);
                return NOTIFY_DONE;
@@ -2395,6 +2934,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
 
        return NOTIFY_DONE;
 
+err_vxlan_work_prepare:
 err_addr_alloc:
        kfree(switchdev_work);
        return NOTIFY_BAD;
index 30c926c4bc477ecfaf7f2adf223fe78fa10daa45..8e5bec04d1f975deb609e3d50269f2096e86cbe6 100644 (file)
@@ -4,6 +4,7 @@
 #include <linux/etherdevice.h>
 #include <linux/inetdevice.h>
 #include <net/netevent.h>
+#include <net/vxlan.h>
 #include <linux/idr.h>
 #include <net/dst_metadata.h>
 #include <net/arp.h>
@@ -187,7 +188,7 @@ static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev)
                return false;
        if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
                return true;
-       if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan"))
+       if (netif_is_vxlan(netdev))
                return true;
 
        return false;
index aeafdb9ac015f9e44f63684bdee7f2f5fab3af76..8721c0506af30b232f78ca7e508dbe60910a7798 100644 (file)
@@ -2728,6 +2728,7 @@ rocker_fdb_offload_notify(struct rocker_port *rocker_port,
 
        info.addr = recv_info->addr;
        info.vid = recv_info->vid;
+       info.offloaded = true;
        call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED,
                                 rocker_port->dev, &info.info);
 }
index 018406c4d944118c629ea72cd32d05062bc90602..1d74f90d6f5d341597c72ecb959f2df824c1669e 100644 (file)
@@ -103,22 +103,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
                return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
 }
 
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
-       if (ipa->sa.sa_family == AF_INET6)
-               return ipv6_addr_any(&ipa->sin6.sin6_addr);
-       else
-               return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
-       if (ipa->sa.sa_family == AF_INET6)
-               return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
-       else
-               return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
-}
-
 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
 {
        if (nla_len(nla) >= sizeof(struct in6_addr)) {
@@ -151,16 +135,6 @@ bool vxlan_addr_equal(const union vxlan_addr *a, const union vxlan_addr *b)
        return a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr;
 }
 
-static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
-{
-       return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
-}
-
-static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
-{
-       return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
-}
-
 static int vxlan_nla_get_addr(union vxlan_addr *ip, struct nlattr *nla)
 {
        if (nla_len(nla) >= sizeof(struct in6_addr)) {
@@ -298,6 +272,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
        ndm->ndm_state = fdb->state;
        ndm->ndm_ifindex = vxlan->dev->ifindex;
        ndm->ndm_flags = fdb->flags;
+       if (rdst->offloaded)
+               ndm->ndm_flags |= NTF_OFFLOADED;
        ndm->ndm_type = RTN_UNICAST;
 
        if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
@@ -353,8 +329,8 @@ static inline size_t vxlan_nlmsg_size(void)
                + nla_total_size(sizeof(struct nda_cacheinfo));
 }
 
-static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
-                            struct vxlan_rdst *rd, int type)
+static void __vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+                              struct vxlan_rdst *rd, int type)
 {
        struct net *net = dev_net(vxlan->dev);
        struct sk_buff *skb;
@@ -379,6 +355,49 @@ errout:
                rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
 }
 
+static void vxlan_fdb_switchdev_call_notifiers(struct vxlan_dev *vxlan,
+                                              struct vxlan_fdb *fdb,
+                                              struct vxlan_rdst *rd,
+                                              bool adding)
+{
+       struct switchdev_notifier_vxlan_fdb_info info;
+       enum switchdev_notifier_type notifier_type;
+
+       if (WARN_ON(!rd))
+               return;
+
+       notifier_type = adding ? SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE
+                              : SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE;
+
+       info = (struct switchdev_notifier_vxlan_fdb_info){
+               .remote_ip = rd->remote_ip,
+               .remote_port = rd->remote_port,
+               .remote_vni = rd->remote_vni,
+               .remote_ifindex = rd->remote_ifindex,
+               .vni = fdb->vni,
+               .offloaded = rd->offloaded,
+       };
+       memcpy(info.eth_addr, fdb->eth_addr, ETH_ALEN);
+
+       call_switchdev_notifiers(notifier_type, vxlan->dev,
+                                &info.info);
+}
+
+static void vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+                            struct vxlan_rdst *rd, int type)
+{
+       switch (type) {
+       case RTM_NEWNEIGH:
+               vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, true);
+               break;
+       case RTM_DELNEIGH:
+               vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd, false);
+               break;
+       }
+
+       __vxlan_fdb_notify(vxlan, fdb, rd, type);
+}
+
 static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
 {
        struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -488,6 +507,47 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
        return NULL;
 }
 
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+                     struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+       u8 eth_addr[ETH_ALEN + 2] = { 0 };
+       struct vxlan_rdst *rdst;
+       struct vxlan_fdb *f;
+       int rc = 0;
+
+       if (is_multicast_ether_addr(mac) ||
+           is_zero_ether_addr(mac))
+               return -EINVAL;
+
+       ether_addr_copy(eth_addr, mac);
+
+       rcu_read_lock();
+
+       f = __vxlan_find_mac(vxlan, eth_addr, vni);
+       if (!f) {
+               rc = -ENOENT;
+               goto out;
+       }
+
+       rdst = first_remote_rcu(f);
+
+       memset(fdb_info, 0, sizeof(*fdb_info));
+       fdb_info->info.dev = dev;
+       fdb_info->remote_ip = rdst->remote_ip;
+       fdb_info->remote_port = rdst->remote_port;
+       fdb_info->remote_vni = rdst->remote_vni;
+       fdb_info->remote_ifindex = rdst->remote_ifindex;
+       fdb_info->vni = vni;
+       fdb_info->offloaded = rdst->offloaded;
+       ether_addr_copy(fdb_info->eth_addr, mac);
+
+out:
+       rcu_read_unlock();
+       return rc;
+}
+EXPORT_SYMBOL_GPL(vxlan_fdb_find_uc);
+
 /* Replace destination of unicast mac */
 static int vxlan_fdb_replace(struct vxlan_fdb *f,
                             union vxlan_addr *ip, __be16 port, __be32 vni,
@@ -533,6 +593,7 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 
        rd->remote_ip = *ip;
        rd->remote_port = port;
+       rd->offloaded = false;
        rd->remote_vni = vni;
        rd->remote_ifindex = ifindex;
 
@@ -782,12 +843,15 @@ static void vxlan_fdb_free(struct rcu_head *head)
 static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
                              bool do_notify)
 {
+       struct vxlan_rdst *rd;
+
        netdev_dbg(vxlan->dev,
                    "delete %pM\n", f->eth_addr);
 
        --vxlan->addrcnt;
        if (do_notify)
-               vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_DELNEIGH);
+               list_for_each_entry(rd, &f->remotes, list)
+                       vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH);
 
        hlist_del_rcu(&f->hlist);
        call_rcu(&f->rcu, vxlan_fdb_free);
@@ -3761,6 +3825,51 @@ static struct notifier_block vxlan_notifier_block __read_mostly = {
        .notifier_call = vxlan_netdevice_event,
 };
 
+static void
+vxlan_fdb_offloaded_set(struct net_device *dev,
+                       struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+       struct vxlan_dev *vxlan = netdev_priv(dev);
+       struct vxlan_rdst *rdst;
+       struct vxlan_fdb *f;
+
+       spin_lock_bh(&vxlan->hash_lock);
+
+       f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni);
+       if (!f)
+               goto out;
+
+       rdst = vxlan_fdb_find_rdst(f, &fdb_info->remote_ip,
+                                  fdb_info->remote_port,
+                                  fdb_info->remote_vni,
+                                  fdb_info->remote_ifindex);
+       if (!rdst)
+               goto out;
+
+       rdst->offloaded = fdb_info->offloaded;
+
+out:
+       spin_unlock_bh(&vxlan->hash_lock);
+}
+
+static int vxlan_switchdev_event(struct notifier_block *unused,
+                                unsigned long event, void *ptr)
+{
+       struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
+
+       switch (event) {
+       case SWITCHDEV_VXLAN_FDB_OFFLOADED:
+               vxlan_fdb_offloaded_set(dev, ptr);
+               break;
+       }
+
+       return 0;
+}
+
+static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
+       .notifier_call = vxlan_switchdev_event,
+};
+
 static __net_init int vxlan_init_net(struct net *net)
 {
        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -3834,11 +3943,17 @@ static int __init vxlan_init_module(void)
        if (rc)
                goto out2;
 
-       rc = rtnl_link_register(&vxlan_link_ops);
+       rc = register_switchdev_notifier(&vxlan_switchdev_notifier_block);
        if (rc)
                goto out3;
 
+       rc = rtnl_link_register(&vxlan_link_ops);
+       if (rc)
+               goto out4;
+
        return 0;
+out4:
+       unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
 out3:
        unregister_netdevice_notifier(&vxlan_notifier_block);
 out2:
@@ -3851,6 +3966,7 @@ late_initcall(vxlan_init_module);
 static void __exit vxlan_cleanup_module(void)
 {
        rtnl_link_unregister(&vxlan_link_ops);
+       unregister_switchdev_notifier(&vxlan_switchdev_notifier_block);
        unregister_netdevice_notifier(&vxlan_notifier_block);
        unregister_pernet_subsys(&vxlan_net_ops);
        /* rcu_barrier() is called by netns */
index 482a1b705362c19545a305aba8c53638c2f13037..c8e2bebd8d934a5560927509ecb81e5536e2f8e8 100644 (file)
@@ -183,8 +183,7 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
  *          1 if something is broken and should be logged (!!! above)
  *          2 if packet should be dropped
  */
-static inline int INET_ECN_decapsulate(struct sk_buff *skb,
-                                      __u8 outer, __u8 inner)
+static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
 {
        if (INET_ECN_is_not_ect(inner)) {
                switch (outer & INET_ECN_MASK) {
@@ -198,10 +197,21 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb,
                }
        }
 
-       if (INET_ECN_is_ce(outer))
+       *set_ce = INET_ECN_is_ce(outer);
+       return 0;
+}
+
+static inline int INET_ECN_decapsulate(struct sk_buff *skb,
+                                      __u8 outer, __u8 inner)
+{
+       bool set_ce = false;
+       int rc;
+
+       rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
+       if (!rc && set_ce)
                INET_ECN_set_ce(skb);
 
-       return 0;
+       return rc;
 }
 
 static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
index d574ce63bf220d1069d2d2a5fa7177496984d92f..881ecb1555bf1a40cd873ba21f16065112afade5 100644 (file)
@@ -145,6 +145,10 @@ enum switchdev_notifier_type {
        SWITCHDEV_FDB_ADD_TO_DEVICE,
        SWITCHDEV_FDB_DEL_TO_DEVICE,
        SWITCHDEV_FDB_OFFLOADED,
+
+       SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE,
+       SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE,
+       SWITCHDEV_VXLAN_FDB_OFFLOADED,
 };
 
 struct switchdev_notifier_info {
@@ -155,7 +159,8 @@ struct switchdev_notifier_fdb_info {
        struct switchdev_notifier_info info; /* must be first */
        const unsigned char *addr;
        u16 vid;
-       bool added_by_user;
+       u8 added_by_user:1,
+          offloaded:1;
 };
 
 static inline struct net_device *
index 7ef15179f26379be9e03eb4d54e19c58f0000c66..03431c148e1677bd3cb140996e8d8a14af9d8a42 100644 (file)
@@ -5,6 +5,8 @@
 #include <linux/if_vlan.h>
 #include <net/udp_tunnel.h>
 #include <net/dst_metadata.h>
+#include <net/rtnetlink.h>
+#include <net/switchdev.h>
 
 /* VXLAN protocol (RFC 7348) header:
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -190,6 +192,7 @@ union vxlan_addr {
 struct vxlan_rdst {
        union vxlan_addr         remote_ip;
        __be16                   remote_port;
+       u8                       offloaded:1;
        __be32                   remote_vni;
        u32                      remote_ifindex;
        struct list_head         list;
@@ -370,4 +373,65 @@ static inline unsigned short vxlan_get_sk_family(struct vxlan_sock *vs)
        return vs->sock->sk->sk_family;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+
+static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
+{
+       if (ipa->sa.sa_family == AF_INET6)
+               return ipv6_addr_any(&ipa->sin6.sin6_addr);
+       else
+               return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+}
+
+static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
+{
+       if (ipa->sa.sa_family == AF_INET6)
+               return ipv6_addr_is_multicast(&ipa->sin6.sin6_addr);
+       else
+               return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+}
+
+#else /* !IS_ENABLED(CONFIG_IPV6) */
+
+static inline bool vxlan_addr_any(const union vxlan_addr *ipa)
+{
+       return ipa->sin.sin_addr.s_addr == htonl(INADDR_ANY);
+}
+
+static inline bool vxlan_addr_multicast(const union vxlan_addr *ipa)
+{
+       return IN_MULTICAST(ntohl(ipa->sin.sin_addr.s_addr));
+}
+
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+static inline bool netif_is_vxlan(const struct net_device *dev)
+{
+       return dev->rtnl_link_ops &&
+              !strcmp(dev->rtnl_link_ops->kind, "vxlan");
+}
+
+struct switchdev_notifier_vxlan_fdb_info {
+       struct switchdev_notifier_info info; /* must be first */
+       union vxlan_addr remote_ip;
+       __be16 remote_port;
+       __be32 remote_vni;
+       u32 remote_ifindex;
+       u8 eth_addr[ETH_ALEN];
+       __be32 vni;
+       bool offloaded;
+};
+
+#if IS_ENABLED(CONFIG_VXLAN)
+int vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+                     struct switchdev_notifier_vxlan_fdb_info *fdb_info);
+#else
+static inline int
+vxlan_fdb_find_uc(struct net_device *dev, const u8 *mac, __be32 vni,
+                 struct switchdev_notifier_vxlan_fdb_info *fdb_info)
+{
+       return -ENOENT;
+}
+#endif
+
 #endif
index e411e40333e2e9816142536b4e6f639961f425cd..360ad66c21e95794f1eaa59d2aee599f56f3f39a 100644 (file)
@@ -151,7 +151,7 @@ static int br_switchdev_event(struct notifier_block *unused,
                        break;
                }
                br_fdb_offloaded_set(br, p, fdb_info->addr,
-                                    fdb_info->vid);
+                                    fdb_info->vid, true);
                break;
        case SWITCHDEV_FDB_DEL_TO_BRIDGE:
                fdb_info = ptr;
@@ -163,7 +163,7 @@ static int br_switchdev_event(struct notifier_block *unused,
        case SWITCHDEV_FDB_OFFLOADED:
                fdb_info = ptr;
                br_fdb_offloaded_set(br, p, fdb_info->addr,
-                                    fdb_info->vid);
+                                    fdb_info->vid, fdb_info->offloaded);
                break;
        }
 
index 74331690a390a52ee090f45133b9b1bb24070400..e56ba3912a905b3617db0f470c32d45652d7359c 100644 (file)
@@ -1152,7 +1152,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 }
 
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
-                         const unsigned char *addr, u16 vid)
+                         const unsigned char *addr, u16 vid, bool offloaded)
 {
        struct net_bridge_fdb_entry *fdb;
 
@@ -1160,7 +1160,7 @@ void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
 
        fdb = br_fdb_find(br, addr, vid);
        if (fdb)
-               fdb->offloaded = 1;
+               fdb->offloaded = offloaded;
 
        spin_unlock_bh(&br->hash_lock);
 }
index 10ee39fdca5cd010c8d7638a7d95de407b0b3480..2920e06a540329ffb8f1286c65bc5acc6f53afad 100644 (file)
@@ -574,7 +574,7 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
                              const unsigned char *addr, u16 vid,
                              bool swdev_notify);
 void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
-                         const unsigned char *addr, u16 vid);
+                         const unsigned char *addr, u16 vid, bool offloaded);
 
 /* br_forward.c */
 enum br_pkt_type {
index d77f807420c486c4d0cfecd946c51b6bac64264a..b993df7706759676d47d56855d53c23a243b0527 100644 (file)
@@ -103,7 +103,7 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
 static void
 br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
                                u16 vid, struct net_device *dev,
-                               bool added_by_user)
+                               bool added_by_user, bool offloaded)
 {
        struct switchdev_notifier_fdb_info info;
        unsigned long notifier_type;
@@ -111,6 +111,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
        info.addr = mac;
        info.vid = vid;
        info.added_by_user = added_by_user;
+       info.offloaded = offloaded;
        notifier_type = adding ? SWITCHDEV_FDB_ADD_TO_DEVICE : SWITCHDEV_FDB_DEL_TO_DEVICE;
        call_switchdev_notifiers(notifier_type, dev, &info.info);
 }
@@ -126,13 +127,15 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
                br_switchdev_fdb_call_notifiers(false, fdb->key.addr.addr,
                                                fdb->key.vlan_id,
                                                fdb->dst->dev,
-                                               fdb->added_by_user);
+                                               fdb->added_by_user,
+                                               fdb->offloaded);
                break;
        case RTM_NEWNEIGH:
                br_switchdev_fdb_call_notifiers(true, fdb->key.addr.addr,
                                                fdb->key.vlan_id,
                                                fdb->dst->dev,
-                                               fdb->added_by_user);
+                                               fdb->added_by_user,
+                                               fdb->offloaded);
                break;
        }
 }
index 3f840b6eea692097895449f6a50bfc2b59724d67..5428ef5290190971e168a70148672cc267b37692 100644 (file)
@@ -1478,6 +1478,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
                        netdev_dbg(dev, "fdb add failed err=%d\n", err);
                        break;
                }
+               fdb_info->offloaded = true;
                call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev,
                                         &fdb_info->info);
                break;