Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
[muen/linux.git] / drivers / net / ethernet / freescale / dpaa2 / dpaa2-eth.c
index c500ea77aaa05014588d7e7b5089539e545e618b..02bf20f51349024820c3081577e18a4c36923847 100644 (file)
@@ -86,16 +86,16 @@ static void free_rx_fd(struct dpaa2_eth_priv *priv,
        for (i = 1; i < DPAA2_ETH_MAX_SG_ENTRIES; i++) {
                addr = dpaa2_sg_get_addr(&sgt[i]);
                sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, addr);
-               dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
-                                DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
+                              DMA_BIDIRECTIONAL);
 
-               skb_free_frag(sg_vaddr);
+               free_pages((unsigned long)sg_vaddr, 0);
                if (dpaa2_sg_is_final(&sgt[i]))
                        break;
        }
 
 free_buf:
-       skb_free_frag(vaddr);
+       free_pages((unsigned long)vaddr, 0);
 }
 
 /* Build a linear skb based on a single-buffer frame descriptor */
@@ -109,7 +109,7 @@ static struct sk_buff *build_linear_skb(struct dpaa2_eth_channel *ch,
 
        ch->buf_count--;
 
-       skb = build_skb(fd_vaddr, DPAA2_ETH_SKB_SIZE);
+       skb = build_skb(fd_vaddr, DPAA2_ETH_RX_BUF_RAW_SIZE);
        if (unlikely(!skb))
                return NULL;
 
@@ -144,19 +144,19 @@ static struct sk_buff *build_frag_skb(struct dpaa2_eth_priv *priv,
                /* Get the address and length from the S/G entry */
                sg_addr = dpaa2_sg_get_addr(sge);
                sg_vaddr = dpaa2_iova_to_virt(priv->iommu_domain, sg_addr);
-               dma_unmap_single(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE,
-                                DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, sg_addr, DPAA2_ETH_RX_BUF_SIZE,
+                              DMA_BIDIRECTIONAL);
 
                sg_length = dpaa2_sg_get_len(sge);
 
                if (i == 0) {
                        /* We build the skb around the first data buffer */
-                       skb = build_skb(sg_vaddr, DPAA2_ETH_SKB_SIZE);
+                       skb = build_skb(sg_vaddr, DPAA2_ETH_RX_BUF_RAW_SIZE);
                        if (unlikely(!skb)) {
                                /* Free the first SG entry now, since we already
                                 * unmapped it and obtained the virtual address
                                 */
-                               skb_free_frag(sg_vaddr);
+                               free_pages((unsigned long)sg_vaddr, 0);
 
                                /* We still need to subtract the buffers used
                                 * by this FD from our software counter
@@ -211,9 +211,9 @@ static void free_bufs(struct dpaa2_eth_priv *priv, u64 *buf_array, int count)
 
        for (i = 0; i < count; i++) {
                vaddr = dpaa2_iova_to_virt(priv->iommu_domain, buf_array[i]);
-               dma_unmap_single(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
-                                DMA_BIDIRECTIONAL);
-               skb_free_frag(vaddr);
+               dma_unmap_page(dev, buf_array[i], DPAA2_ETH_RX_BUF_SIZE,
+                              DMA_BIDIRECTIONAL);
+               free_pages((unsigned long)vaddr, 0);
        }
 }
 
@@ -264,9 +264,7 @@ static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
 
        fq = &priv->fq[queue_id];
        for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-               err = dpaa2_io_service_enqueue_qd(fq->channel->dpio,
-                                                 priv->tx_qdid, 0,
-                                                 fq->tx_qdbin, fd);
+               err = priv->enqueue(priv, fq, fd, 0);
                if (err != -EBUSY)
                        break;
        }
@@ -298,6 +296,7 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
        xdp.data_end = xdp.data + dpaa2_fd_get_len(fd);
        xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
        xdp_set_data_meta_invalid(&xdp);
+       xdp.rxq = &ch->xdp_rxq;
 
        xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
@@ -330,8 +329,20 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
                xdp_release_buf(priv, ch, addr);
                ch->stats.xdp_drop++;
                break;
+       case XDP_REDIRECT:
+               dma_unmap_page(priv->net_dev->dev.parent, addr,
+                              DPAA2_ETH_RX_BUF_SIZE, DMA_BIDIRECTIONAL);
+               ch->buf_count--;
+               xdp.data_hard_start = vaddr;
+               err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog);
+               if (unlikely(err))
+                       ch->stats.xdp_drop++;
+               else
+                       ch->stats.xdp_redirect++;
+               break;
        }
 
+       ch->xdp.res |= xdp_act;
 out:
        rcu_read_unlock();
        return xdp_act;
@@ -378,16 +389,16 @@ static void dpaa2_eth_rx(struct dpaa2_eth_priv *priv,
                        return;
                }
 
-               dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
-                                DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
+                              DMA_BIDIRECTIONAL);
                skb = build_linear_skb(ch, fd, vaddr);
        } else if (fd_format == dpaa2_fd_sg) {
                WARN_ON(priv->xdp_prog);
 
-               dma_unmap_single(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
-                                DMA_BIDIRECTIONAL);
+               dma_unmap_page(dev, addr, DPAA2_ETH_RX_BUF_SIZE,
+                              DMA_BIDIRECTIONAL);
                skb = build_frag_skb(priv, ch, buf_data);
-               skb_free_frag(vaddr);
+               free_pages((unsigned long)vaddr, 0);
                percpu_extras->rx_sg_frames++;
                percpu_extras->rx_sg_bytes += dpaa2_fd_get_len(fd);
        } else {
@@ -573,10 +584,11 @@ static int build_sg_fd(struct dpaa2_eth_priv *priv,
         * all of them on Tx Conf.
         */
        swa = (struct dpaa2_eth_swa *)sgt_buf;
-       swa->skb = skb;
-       swa->scl = scl;
-       swa->num_sg = num_sg;
-       swa->sgt_size = sgt_buf_size;
+       swa->type = DPAA2_ETH_SWA_SG;
+       swa->sg.skb = skb;
+       swa->sg.scl = scl;
+       swa->sg.num_sg = num_sg;
+       swa->sg.sgt_size = sgt_buf_size;
 
        /* Separately map the SGT buffer */
        addr = dma_map_single(dev, sgt_buf, sgt_buf_size, DMA_BIDIRECTIONAL);
@@ -611,7 +623,7 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
 {
        struct device *dev = priv->net_dev->dev.parent;
        u8 *buffer_start, *aligned_start;
-       struct sk_buff **skbh;
+       struct dpaa2_eth_swa *swa;
        dma_addr_t addr;
 
        buffer_start = skb->data - dpaa2_eth_needed_headroom(priv, skb);
@@ -628,8 +640,9 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
         * (in the private data area) such that we can release it
         * on Tx confirm
         */
-       skbh = (struct sk_buff **)buffer_start;
-       *skbh = skb;
+       swa = (struct dpaa2_eth_swa *)buffer_start;
+       swa->type = DPAA2_ETH_SWA_SINGLE;
+       swa->single.skb = skb;
 
        addr = dma_map_single(dev, buffer_start,
                              skb_tail_pointer(skb) - buffer_start,
@@ -657,47 +670,65 @@ static int build_single_fd(struct dpaa2_eth_priv *priv,
  * dpaa2_eth_tx().
  */
 static void free_tx_fd(const struct dpaa2_eth_priv *priv,
-                      const struct dpaa2_fd *fd)
+                      struct dpaa2_eth_fq *fq,
+                      const struct dpaa2_fd *fd, bool in_napi)
 {
        struct device *dev = priv->net_dev->dev.parent;
        dma_addr_t fd_addr;
-       struct sk_buff **skbh, *skb;
+       struct sk_buff *skb = NULL;
        unsigned char *buffer_start;
        struct dpaa2_eth_swa *swa;
        u8 fd_format = dpaa2_fd_get_format(fd);
+       u32 fd_len = dpaa2_fd_get_len(fd);
 
        fd_addr = dpaa2_fd_get_addr(fd);
-       skbh = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
+       buffer_start = dpaa2_iova_to_virt(priv->iommu_domain, fd_addr);
+       swa = (struct dpaa2_eth_swa *)buffer_start;
 
        if (fd_format == dpaa2_fd_single) {
-               skb = *skbh;
-               buffer_start = (unsigned char *)skbh;
-               /* Accessing the skb buffer is safe before dma unmap, because
-                * we didn't map the actual skb shell.
-                */
-               dma_unmap_single(dev, fd_addr,
-                                skb_tail_pointer(skb) - buffer_start,
-                                DMA_BIDIRECTIONAL);
+               if (swa->type == DPAA2_ETH_SWA_SINGLE) {
+                       skb = swa->single.skb;
+                       /* Accessing the skb buffer is safe before dma unmap,
+                        * because we didn't map the actual skb shell.
+                        */
+                       dma_unmap_single(dev, fd_addr,
+                                        skb_tail_pointer(skb) - buffer_start,
+                                        DMA_BIDIRECTIONAL);
+               } else {
+                       WARN_ONCE(swa->type != DPAA2_ETH_SWA_XDP, "Wrong SWA type");
+                       dma_unmap_single(dev, fd_addr, swa->xdp.dma_size,
+                                        DMA_BIDIRECTIONAL);
+               }
        } else if (fd_format == dpaa2_fd_sg) {
-               swa = (struct dpaa2_eth_swa *)skbh;
-               skb = swa->skb;
+               skb = swa->sg.skb;
 
                /* Unmap the scatterlist */
-               dma_unmap_sg(dev, swa->scl, swa->num_sg, DMA_BIDIRECTIONAL);
-               kfree(swa->scl);
+               dma_unmap_sg(dev, swa->sg.scl, swa->sg.num_sg,
+                            DMA_BIDIRECTIONAL);
+               kfree(swa->sg.scl);
 
                /* Unmap the SGT buffer */
-               dma_unmap_single(dev, fd_addr, swa->sgt_size,
+               dma_unmap_single(dev, fd_addr, swa->sg.sgt_size,
                                 DMA_BIDIRECTIONAL);
        } else {
                netdev_dbg(priv->net_dev, "Invalid FD format\n");
                return;
        }
 
+       if (swa->type != DPAA2_ETH_SWA_XDP && in_napi) {
+               fq->dq_frames++;
+               fq->dq_bytes += fd_len;
+       }
+
+       if (swa->type == DPAA2_ETH_SWA_XDP) {
+               xdp_return_frame(swa->xdp.xdpf);
+               return;
+       }
+
        /* Get the timestamp value */
        if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
                struct skb_shared_hwtstamps shhwtstamps;
-               __le64 *ts = dpaa2_get_ts(skbh, true);
+               __le64 *ts = dpaa2_get_ts(buffer_start, true);
                u64 ns;
 
                memset(&shhwtstamps, 0, sizeof(shhwtstamps));
@@ -709,10 +740,10 @@ static void free_tx_fd(const struct dpaa2_eth_priv *priv,
 
        /* Free SGT buffer allocated on tx */
        if (fd_format != dpaa2_fd_single)
-               skb_free_frag(skbh);
+               skb_free_frag(buffer_start);
 
        /* Move on with skb release */
-       dev_kfree_skb(skb);
+       napi_consume_skb(skb, in_napi);
 }
 
 static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
@@ -785,9 +816,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
        queue_mapping = skb_get_queue_mapping(skb);
        fq = &priv->fq[queue_mapping];
        for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-               err = dpaa2_io_service_enqueue_qd(fq->channel->dpio,
-                                                 priv->tx_qdid, 0,
-                                                 fq->tx_qdbin, &fd);
+               err = priv->enqueue(priv, fq, &fd, 0);
                if (err != -EBUSY)
                        break;
        }
@@ -795,7 +824,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
        if (unlikely(err < 0)) {
                percpu_stats->tx_errors++;
                /* Clean up everything, including freeing the skb */
-               free_tx_fd(priv, &fd);
+               free_tx_fd(priv, fq, &fd, false);
        } else {
                fd_len = dpaa2_fd_get_len(&fd);
                percpu_stats->tx_packets++;
@@ -832,12 +861,9 @@ static void dpaa2_eth_tx_conf(struct dpaa2_eth_priv *priv,
        percpu_extras->tx_conf_frames++;
        percpu_extras->tx_conf_bytes += fd_len;
 
-       fq->dq_frames++;
-       fq->dq_bytes += fd_len;
-
        /* Check frame errors in the FD field */
        fd_errors = dpaa2_fd_get_ctrl(fd) & DPAA2_FD_TX_ERR_MASK;
-       free_tx_fd(priv, fd);
+       free_tx_fd(priv, fq, fd, true);
 
        if (likely(!fd_errors))
                return;
@@ -903,7 +929,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
 {
        struct device *dev = priv->net_dev->dev.parent;
        u64 buf_array[DPAA2_ETH_BUFS_PER_CMD];
-       void *buf;
+       struct page *page;
        dma_addr_t addr;
        int i, err;
 
@@ -911,14 +937,16 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
                /* Allocate buffer visible to WRIOP + skb shared info +
                 * alignment padding
                 */
-               buf = napi_alloc_frag(dpaa2_eth_buf_raw_size(priv));
-               if (unlikely(!buf))
+               /* allocate one page for each Rx buffer. WRIOP sees
+                * the entire page except for a tailroom reserved for
+                * skb shared info
+                */
+               page = dev_alloc_pages(0);
+               if (!page)
                        goto err_alloc;
 
-               buf = PTR_ALIGN(buf, priv->rx_buf_align);
-
-               addr = dma_map_single(dev, buf, DPAA2_ETH_RX_BUF_SIZE,
-                                     DMA_BIDIRECTIONAL);
+               addr = dma_map_page(dev, page, 0, DPAA2_ETH_RX_BUF_SIZE,
+                                   DMA_BIDIRECTIONAL);
                if (unlikely(dma_mapping_error(dev, addr)))
                        goto err_map;
 
@@ -926,7 +954,7 @@ static int add_bufs(struct dpaa2_eth_priv *priv,
 
                /* tracing point */
                trace_dpaa2_eth_buf_seed(priv->net_dev,
-                                        buf, dpaa2_eth_buf_raw_size(priv),
+                                        page, DPAA2_ETH_RX_BUF_RAW_SIZE,
                                         addr, DPAA2_ETH_RX_BUF_SIZE,
                                         bpid);
        }
@@ -948,7 +976,7 @@ release_bufs:
        return i;
 
 err_map:
-       skb_free_frag(buf);
+       __free_pages(page, 0);
 err_alloc:
        /* If we managed to allocate at least some buffers,
         * release them to hardware
@@ -1083,6 +1111,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
        int err;
 
        ch = container_of(napi, struct dpaa2_eth_channel, napi);
+       ch->xdp.res = 0;
        priv = ch->priv;
 
        do {
@@ -1128,7 +1157,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
        work_done = max(rx_cleaned, 1);
 
 out:
-       if (txc_fq) {
+       if (txc_fq && txc_fq->dq_frames) {
                nq = netdev_get_tx_queue(priv->net_dev, txc_fq->flowid);
                netdev_tx_completed_queue(nq, txc_fq->dq_frames,
                                          txc_fq->dq_bytes);
@@ -1136,6 +1165,9 @@ out:
                txc_fq->dq_bytes = 0;
        }
 
+       if (ch->xdp.res & XDP_REDIRECT)
+               xdp_do_flush_map();
+
        return work_done;
 }
 
@@ -1243,34 +1275,36 @@ enable_err:
        return err;
 }
 
-/* The DPIO store must be empty when we call this,
- * at the end of every NAPI cycle.
- */
-static u32 drain_channel(struct dpaa2_eth_channel *ch)
+/* Total number of in-flight frames on ingress queues */
+static u32 ingress_fq_count(struct dpaa2_eth_priv *priv)
 {
-       u32 drained = 0, total = 0;
+       struct dpaa2_eth_fq *fq;
+       u32 fcnt = 0, bcnt = 0, total = 0;
+       int i, err;
 
-       do {
-               pull_channel(ch);
-               drained = consume_frames(ch, NULL);
-               total += drained;
-       } while (drained);
+       for (i = 0; i < priv->num_fqs; i++) {
+               fq = &priv->fq[i];
+               err = dpaa2_io_query_fq_count(NULL, fq->fqid, &fcnt, &bcnt);
+               if (err) {
+                       netdev_warn(priv->net_dev, "query_fq_count failed");
+                       break;
+               }
+               total += fcnt;
+       }
 
        return total;
 }
 
-static u32 drain_ingress_frames(struct dpaa2_eth_priv *priv)
+static void wait_for_fq_empty(struct dpaa2_eth_priv *priv)
 {
-       struct dpaa2_eth_channel *ch;
-       int i;
-       u32 drained = 0;
-
-       for (i = 0; i < priv->num_channels; i++) {
-               ch = priv->channel[i];
-               drained += drain_channel(ch);
-       }
+       int retries = 10;
+       u32 pending;
 
-       return drained;
+       do {
+               pending = ingress_fq_count(priv);
+               if (pending)
+                       msleep(100);
+       } while (pending && --retries);
 }
 
 static int dpaa2_eth_stop(struct net_device *net_dev)
@@ -1278,14 +1312,22 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
        struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
        int dpni_enabled = 0;
        int retries = 10;
-       u32 drained;
 
        netif_tx_stop_all_queues(net_dev);
        netif_carrier_off(net_dev);
 
-       /* Loop while dpni_disable() attempts to drain the egress FQs
-        * and confirm them back to us.
+       /* On dpni_disable(), the MC firmware will:
+        * - stop MAC Rx and wait for all Rx frames to be enqueued to software
+        * - cut off WRIOP dequeues from egress FQs and wait until transmission
+        * of all in flight Tx frames is finished (and corresponding Tx conf
+        * frames are enqueued back to software)
+        *
+        * Before calling dpni_disable(), we wait for all Tx frames to arrive
+        * on WRIOP. After it finishes, wait until all remaining frames on Rx
+        * and Tx conf queues are consumed on NAPI poll.
         */
+       msleep(500);
+
        do {
                dpni_disable(priv->mc_io, 0, priv->mc_token);
                dpni_is_enabled(priv->mc_io, 0, priv->mc_token, &dpni_enabled);
@@ -1300,19 +1342,9 @@ static int dpaa2_eth_stop(struct net_device *net_dev)
                 */
        }
 
-       /* Wait for NAPI to complete on every core and disable it.
-        * In particular, this will also prevent NAPI from being rescheduled if
-        * a new CDAN is serviced, effectively discarding the CDAN. We therefore
-        * don't even need to disarm the channels, except perhaps for the case
-        * of a huge coalescing value.
-        */
+       wait_for_fq_empty(priv);
        disable_ch_napi(priv);
 
-        /* Manually drain the Rx and TxConf queues */
-       drained = drain_ingress_frames(priv);
-       if (drained)
-               netdev_dbg(net_dev, "Drained %d frames.\n", drained);
-
        /* Empty the buffer pool */
        drain_pool(priv);
 
@@ -1730,6 +1762,105 @@ static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
        return 0;
 }
 
+static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
+                                   struct xdp_frame *xdpf)
+{
+       struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+       struct device *dev = net_dev->dev.parent;
+       struct rtnl_link_stats64 *percpu_stats;
+       struct dpaa2_eth_drv_stats *percpu_extras;
+       unsigned int needed_headroom;
+       struct dpaa2_eth_swa *swa;
+       struct dpaa2_eth_fq *fq;
+       struct dpaa2_fd fd;
+       void *buffer_start, *aligned_start;
+       dma_addr_t addr;
+       int err, i;
+
+       /* We require a minimum headroom to be able to transmit the frame.
+        * Otherwise return an error and let the original net_device handle it
+        */
+       needed_headroom = dpaa2_eth_needed_headroom(priv, NULL);
+       if (xdpf->headroom < needed_headroom)
+               return -EINVAL;
+
+       percpu_stats = this_cpu_ptr(priv->percpu_stats);
+       percpu_extras = this_cpu_ptr(priv->percpu_extras);
+
+       /* Setup the FD fields */
+       memset(&fd, 0, sizeof(fd));
+
+       /* Align FD address, if possible */
+       buffer_start = xdpf->data - needed_headroom;
+       aligned_start = PTR_ALIGN(buffer_start - DPAA2_ETH_TX_BUF_ALIGN,
+                                 DPAA2_ETH_TX_BUF_ALIGN);
+       if (aligned_start >= xdpf->data - xdpf->headroom)
+               buffer_start = aligned_start;
+
+       swa = (struct dpaa2_eth_swa *)buffer_start;
+       /* fill in necessary fields here */
+       swa->type = DPAA2_ETH_SWA_XDP;
+       swa->xdp.dma_size = xdpf->data + xdpf->len - buffer_start;
+       swa->xdp.xdpf = xdpf;
+
+       addr = dma_map_single(dev, buffer_start,
+                             swa->xdp.dma_size,
+                             DMA_BIDIRECTIONAL);
+       if (unlikely(dma_mapping_error(dev, addr))) {
+               percpu_stats->tx_dropped++;
+               return -ENOMEM;
+       }
+
+       dpaa2_fd_set_addr(&fd, addr);
+       dpaa2_fd_set_offset(&fd, xdpf->data - buffer_start);
+       dpaa2_fd_set_len(&fd, xdpf->len);
+       dpaa2_fd_set_format(&fd, dpaa2_fd_single);
+       dpaa2_fd_set_ctrl(&fd, FD_CTRL_PTA);
+
+       fq = &priv->fq[smp_processor_id()];
+       for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
+               err = priv->enqueue(priv, fq, &fd, 0);
+               if (err != -EBUSY)
+                       break;
+       }
+       percpu_extras->tx_portal_busy += i;
+       if (unlikely(err < 0)) {
+               percpu_stats->tx_errors++;
+               /* let the Rx device handle the cleanup */
+               return err;
+       }
+
+       percpu_stats->tx_packets++;
+       percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
+
+       return 0;
+}
+
+static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
+                             struct xdp_frame **frames, u32 flags)
+{
+       int drops = 0;
+       int i, err;
+
+       if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+               return -EINVAL;
+
+       if (!netif_running(net_dev))
+               return -ENETDOWN;
+
+       for (i = 0; i < n; i++) {
+               struct xdp_frame *xdpf = frames[i];
+
+               err = dpaa2_eth_xdp_xmit_frame(net_dev, xdpf);
+               if (err) {
+                       xdp_return_frame_rx_napi(xdpf);
+                       drops++;
+               }
+       }
+
+       return n - drops;
+}
+
 static const struct net_device_ops dpaa2_eth_ops = {
        .ndo_open = dpaa2_eth_open,
        .ndo_start_xmit = dpaa2_eth_tx,
@@ -1741,6 +1872,7 @@ static const struct net_device_ops dpaa2_eth_ops = {
        .ndo_do_ioctl = dpaa2_eth_ioctl,
        .ndo_change_mtu = dpaa2_eth_change_mtu,
        .ndo_bpf = dpaa2_eth_xdp,
+       .ndo_xdp_xmit = dpaa2_eth_xdp_xmit,
 };
 
 static void cdan_cb(struct dpaa2_io_notification_ctx *ctx)
@@ -2135,6 +2267,7 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
 {
        struct device *dev = priv->net_dev->dev.parent;
        struct dpni_buffer_layout buf_layout = {0};
+       u16 rx_buf_align;
        int err;
 
        /* We need to check for WRIOP version 1.0.0, but depending on the MC
@@ -2143,9 +2276,9 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
         */
        if (priv->dpni_attrs.wriop_version == DPAA2_WRIOP_VERSION(0, 0, 0) ||
            priv->dpni_attrs.wriop_version == DPAA2_WRIOP_VERSION(1, 0, 0))
-               priv->rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN_REV1;
+               rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN_REV1;
        else
-               priv->rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN;
+               rx_buf_align = DPAA2_ETH_RX_BUF_ALIGN;
 
        /* tx buffer */
        buf_layout.private_data_size = DPAA2_ETH_SWA_SIZE;
@@ -2185,7 +2318,7 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
        /* rx buffer */
        buf_layout.pass_frame_status = true;
        buf_layout.pass_parser_result = true;
-       buf_layout.data_align = priv->rx_buf_align;
+       buf_layout.data_align = rx_buf_align;
        buf_layout.data_head_room = dpaa2_eth_rx_head_room(priv);
        buf_layout.private_data_size = 0;
        buf_layout.options = DPNI_BUF_LAYOUT_OPT_PARSER_RESULT |
@@ -2203,6 +2336,36 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
        return 0;
 }
 
+#define DPNI_ENQUEUE_FQID_VER_MAJOR    7
+#define DPNI_ENQUEUE_FQID_VER_MINOR    9
+
+static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv,
+                                      struct dpaa2_eth_fq *fq,
+                                      struct dpaa2_fd *fd, u8 prio)
+{
+       return dpaa2_io_service_enqueue_qd(fq->channel->dpio,
+                                          priv->tx_qdid, prio,
+                                          fq->tx_qdbin, fd);
+}
+
+static inline int dpaa2_eth_enqueue_fq(struct dpaa2_eth_priv *priv,
+                                      struct dpaa2_eth_fq *fq,
+                                      struct dpaa2_fd *fd,
+                                      u8 prio __always_unused)
+{
+       return dpaa2_io_service_enqueue_fq(fq->channel->dpio,
+                                          fq->tx_fqid, fd);
+}
+
+static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
+{
+       if (dpaa2_eth_cmp_dpni_ver(priv, DPNI_ENQUEUE_FQID_VER_MAJOR,
+                                  DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
+               priv->enqueue = dpaa2_eth_enqueue_qd;
+       else
+               priv->enqueue = dpaa2_eth_enqueue_fq;
+}
+
 /* Configure the DPNI object this interface is associated with */
 static int setup_dpni(struct fsl_mc_device *ls_dev)
 {
@@ -2256,6 +2419,8 @@ static int setup_dpni(struct fsl_mc_device *ls_dev)
        if (err)
                goto close;
 
+       set_enqueue_mode(priv);
+
        priv->cls_rules = devm_kzalloc(dev, sizeof(struct dpaa2_eth_cls_rule) *
                                       dpaa2_eth_fs_count(priv), GFP_KERNEL);
        if (!priv->cls_rules)
@@ -2321,6 +2486,21 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
                return err;
        }
 
+       /* xdp_rxq setup */
+       err = xdp_rxq_info_reg(&fq->channel->xdp_rxq, priv->net_dev,
+                              fq->flowid);
+       if (err) {
+               dev_err(dev, "xdp_rxq_info_reg failed\n");
+               return err;
+       }
+
+       err = xdp_rxq_info_reg_mem_model(&fq->channel->xdp_rxq,
+                                        MEM_TYPE_PAGE_ORDER0, NULL);
+       if (err) {
+               dev_err(dev, "xdp_rxq_info_reg_mem_model failed\n");
+               return err;
+       }
+
        return 0;
 }
 
@@ -2340,6 +2520,7 @@ static int setup_tx_flow(struct dpaa2_eth_priv *priv,
        }
 
        fq->tx_qdbin = qid.qdbin;
+       fq->tx_fqid = qid.fqid;
 
        err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
                             DPNI_QUEUE_TX_CONFIRM, 0, fq->flowid,
@@ -3084,6 +3265,10 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
                goto err_netdev_reg;
        }
 
+#ifdef CONFIG_DEBUG_FS
+       dpaa2_dbg_add(priv);
+#endif
+
        dev_info(dev, "Probed interface %s\n", net_dev->name);
        return 0;
 
@@ -3127,6 +3312,9 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev)
        net_dev = dev_get_drvdata(dev);
        priv = netdev_priv(net_dev);
 
+#ifdef CONFIG_DEBUG_FS
+       dpaa2_dbg_remove(priv);
+#endif
        unregister_netdev(net_dev);
 
        if (priv->do_link_poll)
@@ -3171,4 +3359,25 @@ static struct fsl_mc_driver dpaa2_eth_driver = {
        .match_id_table = dpaa2_eth_match_id_table
 };
 
-module_fsl_mc_driver(dpaa2_eth_driver);
+static int __init dpaa2_eth_driver_init(void)
+{
+       int err;
+
+       dpaa2_eth_dbg_init();
+       err = fsl_mc_driver_register(&dpaa2_eth_driver);
+       if (err) {
+               dpaa2_eth_dbg_exit();
+               return err;
+       }
+
+       return 0;
+}
+
+static void __exit dpaa2_eth_driver_exit(void)
+{
+       dpaa2_eth_dbg_exit();
+       fsl_mc_driver_unregister(&dpaa2_eth_driver);
+}
+
+module_init(dpaa2_eth_driver_init);
+module_exit(dpaa2_eth_driver_exit);