[RFC,v3,1/6] net/af_xdp: new PMD driver

Message ID 20180816144321.17719-2-qi.z.zhang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [RFC,v3,1/6] net/af_xdp: new PMD driver |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation fail Compilation issues

Commit Message

Qi Zhang Aug. 16, 2018, 2:43 p.m. UTC
  Add a new PMD driver for AF_XDP which is a proposed
faster version of AF_PACKET interface in Linux.
https://fosdem.org/2018/schedule/event/af_xdp/
https://lwn.net/Articles/745934/

This patch enable the vanilla version.
Packet data will copy between xdp socket's memory buffer and
rx queue's mbuf mempool, also memory allocation of xdp socket's memory
buffer is simply managed by a fifo ring.
Further improvement will be covered in following patches.

Signed-off-by: Qi Zhang <qi.z.zhang@intel.com>
---
 config/common_base                            |    5 +
 config/common_linuxapp                        |    1 +
 drivers/net/Makefile                          |    1 +
 drivers/net/af_xdp/Makefile                   |   30 +
 drivers/net/af_xdp/meson.build                |    7 +
 drivers/net/af_xdp/rte_eth_af_xdp.c           | 1247 +++++++++++++++++++++++++
 drivers/net/af_xdp/rte_pmd_af_xdp_version.map |    4 +
 mk/rte.app.mk                                 |    1 +
 8 files changed, 1296 insertions(+)
 create mode 100644 drivers/net/af_xdp/Makefile
 create mode 100644 drivers/net/af_xdp/meson.build
 create mode 100644 drivers/net/af_xdp/rte_eth_af_xdp.c
 create mode 100644 drivers/net/af_xdp/rte_pmd_af_xdp_version.map
  

Patch

diff --git a/config/common_base b/config/common_base
index 4bcbaf923..81aa81754 100644
--- a/config/common_base
+++ b/config/common_base
@@ -383,6 +383,11 @@  CONFIG_RTE_LIBRTE_VMXNET3_DEBUG_TX_FREE=n
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=n
 
 #
+# Compile software PMD backed by AF_XDP sockets (Linux only)
+#
+CONFIG_RTE_LIBRTE_PMD_AF_XDP=n
+
+#
 # Compile link bonding PMD library
 #
 CONFIG_RTE_LIBRTE_PMD_BOND=y
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 9c5ea9d89..5fa1cfb87 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -18,6 +18,7 @@  CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_IFC_PMD=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
 CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y
+CONFIG_RTE_LIBRTE_PMD_AF_XDP=y
 CONFIG_RTE_LIBRTE_PMD_TAP=y
 CONFIG_RTE_LIBRTE_AVP_PMD=y
 CONFIG_RTE_LIBRTE_VDEV_NETVSC_PMD=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 664398de9..7cff65c45 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -9,6 +9,7 @@  ifeq ($(CONFIG_RTE_LIBRTE_THUNDERX_NICVF_PMD),d)
 endif
 
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += af_packet
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += af_xdp
 DIRS-$(CONFIG_RTE_LIBRTE_ARK_PMD) += ark
 DIRS-$(CONFIG_RTE_LIBRTE_AVF_PMD) += avf
 DIRS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += avp
diff --git a/drivers/net/af_xdp/Makefile b/drivers/net/af_xdp/Makefile
new file mode 100644
index 000000000..8dee0144a
--- /dev/null
+++ b/drivers/net/af_xdp/Makefile
@@ -0,0 +1,30 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_af_xdp.a
+
+EXPORT_MAP := rte_pmd_af_xdp_version.map
+
+LIBABIVER := 1
+
+
+CFLAGS += -O3
+# below line should be removed
+CFLAGS += -I/home/qzhan15/bpf/usr/include
+
+CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_vdev
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += rte_eth_af_xdp.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/af_xdp/meson.build b/drivers/net/af_xdp/meson.build
new file mode 100644
index 000000000..4b6652685
--- /dev/null
+++ b/drivers/net/af_xdp/meson.build
@@ -0,0 +1,7 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+if host_machine.system() != 'linux'
+	build = false
+endif
+sources = files('rte_eth_af_xdp.c')
diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
new file mode 100644
index 000000000..12252014d
--- /dev/null
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -0,0 +1,1247 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev_driver.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_malloc.h>
+#include <rte_kvargs.h>
+#include <rte_bus_vdev.h>
+
+#include <linux/if_ether.h>
+#include <linux/if_xdp.h>
+#include <linux/if_link.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <poll.h>
+#include <bpf/bpf.h>
+
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+#define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+#define PF_XDP AF_XDP
+#endif
+
+#define ETH_AF_XDP_IFACE_ARG			"iface"
+#define ETH_AF_XDP_QUEUE_IDX_ARG		"queue"
+#define ETH_AF_XDP_XSK_MAP_ID_ARG		"xsk_map_id"
+#define ETH_AF_XDP_XSK_MAP_KEY_START_ARG	"xsk_map_key_start"
+#define ETH_AF_XDP_XSK_MAP_KEY_COUNT_ARG	"xsk_map_key_count"
+
+#define ETH_AF_XDP_FRAME_SIZE		2048
+#define ETH_AF_XDP_NUM_BUFFERS		4096
+#define ETH_AF_XDP_DATA_HEADROOM	0
+#define ETH_AF_XDP_DFLT_NUM_DESCS	1024
+#define ETH_AF_XDP_FQ_NUM_DESCS		1024
+#define ETH_AF_XDP_CQ_NUM_DESCS		1024
+#define ETH_AF_XDP_DFLT_QUEUE_IDX	0
+
+#define ETH_AF_XDP_RX_BATCH_SIZE	16
+#define ETH_AF_XDP_TX_BATCH_SIZE	16
+
+#define ETH_AF_XDP_MAX_QUEUE_PAIRS	16
+
+struct xdp_umem_uqueue {
+	uint32_t cached_prod;
+	uint32_t cached_cons;
+	uint32_t mask;
+	uint32_t size;
+	uint32_t *producer;
+	uint32_t *consumer;
+	uint64_t *ring;
+	void *map;
+};
+
+struct xdp_umem {
+	char *frames;
+	struct xdp_umem_uqueue fq;
+	struct xdp_umem_uqueue cq;
+	struct rte_ring *buf_ring; /* be used to manage the buffer */
+	int fd;
+};
+
+struct xdp_uqueue {
+	uint32_t cached_prod;
+	uint32_t cached_cons;
+	uint32_t mask;
+	uint32_t size;
+	uint32_t *producer;
+	uint32_t *consumer;
+	struct  xdp_desc *ring;
+	void *map;
+};
+
+static inline uint32_t xq_nb_avail(struct xdp_uqueue *q, uint32_t ndescs)
+{
+	uint32_t entries = q->cached_prod - q->cached_cons;
+
+	if (entries == 0) {
+		q->cached_prod = *q->producer;
+		entries = q->cached_prod - q->cached_cons;
+	}
+
+	return (entries > ndescs) ? ndescs : entries;
+}
+
+static inline uint32_t xq_nb_free(struct xdp_uqueue *q, uint32_t ndescs)
+{
+	uint32_t free_entries = q->cached_cons - q->cached_prod;
+
+	if (free_entries >= ndescs)
+		return free_entries;
+
+	/* Refresh the local tail pointer */
+	q->cached_cons = *q->consumer + q->size;
+	return q->cached_cons - q->cached_prod;
+}
+
+static inline uint32_t umem_nb_avail(struct xdp_umem_uqueue *q, uint32_t nb)
+{
+	uint32_t entries = q->cached_prod - q->cached_cons;
+
+	if (entries == 0) {
+		q->cached_prod = *q->producer;
+		entries = q->cached_prod - q->cached_cons;
+	}
+	return (entries > nb) ? nb : entries;
+}
+
+static inline uint32_t umem_nb_free(struct xdp_umem_uqueue *q, uint32_t nb)
+{
+	uint32_t free_entries = q->cached_cons - q->cached_prod;
+
+	if (free_entries >= nb)
+		return free_entries;
+
+	/* Refresh the local tail pointer */
+	q->cached_cons = *q->consumer + q->size;
+
+	return q->cached_cons - q->cached_prod;
+}
+
+static inline int umem_fill_to_kernel_ex(struct xdp_umem_uqueue *fq,
+					 struct xdp_desc *d,
+					 size_t nb)
+{
+	uint32_t i;
+
+	if (umem_nb_free(fq, nb) < nb)
+		return -ENOSPC;
+
+	for (i = 0; i < nb; i++) {
+		uint32_t idx = fq->cached_prod++ & fq->mask;
+
+		fq->ring[idx] = d[i].addr;
+	}
+
+	rte_smp_wmb();
+
+	*fq->producer = fq->cached_prod;
+
+	return 0;
+}
+
+static inline int umem_fill_to_kernel(struct xdp_umem_uqueue *fq,
+				      uint64_t *d,
+				      size_t nb)
+{
+	uint32_t i;
+
+	if (umem_nb_free(fq, nb) < nb)
+		return -ENOSPC;
+
+	for (i = 0; i < nb; i++) {
+		uint32_t idx = fq->cached_prod++ & fq->mask;
+
+		fq->ring[idx] = d[i];
+	}
+
+	rte_smp_wmb();
+	*fq->producer = fq->cached_prod;
+
+	return 0;
+}
+
+static inline size_t umem_complete_from_kernel(struct xdp_umem_uqueue *cq,
+					       uint64_t *d, size_t nb)
+{
+	uint32_t idx, i, entries = umem_nb_avail(cq, nb);
+
+	rte_smp_rmb();
+
+	for (i = 0; i < entries; i++) {
+		idx = cq->cached_cons++ & cq->mask;
+		d[i] = cq->ring[idx];
+	}
+
+	if (entries > 0) {
+		rte_smp_wmb();
+		*cq->consumer = cq->cached_cons;
+	}
+
+	return entries;
+}
+
+static inline int xq_enq(struct xdp_uqueue *uq,
+			 const struct xdp_desc *descs,
+			 unsigned int ndescs)
+{
+	struct xdp_desc *r = uq->ring;
+	unsigned int i;
+
+	if (xq_nb_free(uq, ndescs) < ndescs)
+		return -ENOSPC;
+
+	for (i = 0; i < ndescs; i++) {
+		uint32_t idx = uq->cached_prod++ & uq->mask;
+
+		r[idx].addr = descs[i].addr;
+		r[idx].len = descs[i].len;
+	}
+
+	rte_smp_wmb();
+
+	*uq->producer = uq->cached_prod;
+	return 0;
+}
+
+static inline int xq_deq(struct xdp_uqueue *uq,
+			 struct xdp_desc *descs,
+			 int ndescs)
+{
+	struct xdp_desc *r = uq->ring;
+	unsigned int idx;
+	int i, entries;
+
+	entries = xq_nb_avail(uq, ndescs);
+	rte_smp_rmb();
+
+	for (i = 0; i < entries; i++) {
+		idx = uq->cached_cons++ & uq->mask;
+		descs[i] = r[idx];
+	}
+
+	if (entries > 0) {
+		rte_smp_wmb();
+
+		*uq->consumer = uq->cached_cons;
+	}
+
+	return entries;
+}
+
+struct pkt_rx_queue {
+	int xsk_fd;
+	uint16_t queue_idx;
+	struct xdp_uqueue rx;
+	struct xdp_umem *umem;
+	struct rte_mempool *mb_pool;
+
+	unsigned long rx_pkts;
+	unsigned long rx_bytes;
+	unsigned long rx_dropped;
+
+	struct pkt_tx_queue *pair;
+};
+
+struct pkt_tx_queue {
+	uint16_t queue_idx;
+	struct xdp_uqueue tx;
+
+	unsigned long tx_pkts;
+	unsigned long err_pkts;
+	unsigned long tx_bytes;
+
+	struct pkt_rx_queue *pair;
+};
+
+struct pmd_internals {
+	int if_index;
+	char if_name[IFNAMSIZ];
+	uint16_t queue_idx;
+	struct ether_addr eth_addr;
+	struct xdp_umem *umem_share;
+	int umem_share_count;
+	struct rte_mempool *mb_pool_share;
+	int xsk_map_id;
+	int xsk_map_key_start;
+	int xsk_map_key_count;
+
+	struct pkt_rx_queue rx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS];
+	struct pkt_tx_queue tx_queues[ETH_AF_XDP_MAX_QUEUE_PAIRS];
+};
+
+static const char * const valid_arguments[] = {
+	ETH_AF_XDP_IFACE_ARG,
+	ETH_AF_XDP_QUEUE_IDX_ARG,
+	ETH_AF_XDP_XSK_MAP_ID_ARG,
+	ETH_AF_XDP_XSK_MAP_KEY_START_ARG,
+	ETH_AF_XDP_XSK_MAP_KEY_COUNT_ARG,
+	NULL
+};
+
+static struct rte_eth_link pmd_link = {
+	.link_speed = ETH_SPEED_NUM_10G,
+	.link_duplex = ETH_LINK_FULL_DUPLEX,
+	.link_status = ETH_LINK_DOWN,
+	.link_autoneg = ETH_LINK_AUTONEG
+};
+
+static char *get_pkt_data(struct xdp_umem *umem, uint64_t addr)
+{
+	return &umem->frames[addr];
+}
+
+static uint16_t
+eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct xdp_desc descs[ETH_AF_XDP_RX_BATCH_SIZE];
+	void *addrs[ETH_AF_XDP_RX_BATCH_SIZE];
+	struct pkt_rx_queue *rxq = queue;
+	struct xdp_uqueue *uq = &rxq->rx;
+	struct xdp_umem_uqueue *fq = &rxq->umem->fq;
+	uint32_t free_thresh = fq->size >> 1;
+	struct rte_mbuf *mbuf;
+	unsigned long dropped = 0;
+	unsigned long rx_bytes = 0;
+	uint16_t count = 0;
+	int rcvd, i;
+
+	nb_pkts = nb_pkts < ETH_AF_XDP_RX_BATCH_SIZE ?
+		  nb_pkts : ETH_AF_XDP_RX_BATCH_SIZE;
+
+	if (umem_nb_free(fq, free_thresh) >= free_thresh) {
+		int n = rte_ring_dequeue_bulk(rxq->umem->buf_ring,
+					      addrs,
+					      ETH_AF_XDP_RX_BATCH_SIZE,
+					      NULL);
+		if (n == 0)
+			return -ENOMEM;
+
+		if (umem_fill_to_kernel(fq, (uint64_t *)&addrs[0],
+					ETH_AF_XDP_RX_BATCH_SIZE)) {
+			rte_ring_enqueue_bulk(rxq->umem->buf_ring,
+					      addrs,
+					      ETH_AF_XDP_RX_BATCH_SIZE,
+					      NULL);
+		}
+	}
+
+	/* read data */
+	rcvd = xq_deq(uq, descs, nb_pkts);
+	if (rcvd == 0)
+		return 0;
+
+	for (i = 0; i < rcvd; i++) {
+		char *pkt;
+		uint64_t addr = descs[i].addr;
+
+		mbuf = rte_pktmbuf_alloc(rxq->mb_pool);
+		rte_pktmbuf_pkt_len(mbuf) =
+			rte_pktmbuf_data_len(mbuf) =
+			descs[i].len;
+		if (mbuf) {
+			pkt = get_pkt_data(rxq->umem, addr);
+			memcpy(rte_pktmbuf_mtod(mbuf, void *),
+			       pkt, descs[i].len);
+			rx_bytes += descs[i].len;
+			bufs[count++] = mbuf;
+		} else {
+			dropped++;
+		}
+		addrs[i] = (void *)addr;
+	}
+
+	rte_ring_enqueue_bulk(rxq->umem->buf_ring, addrs, rcvd, NULL);
+
+	rxq->rx_pkts += (rcvd - dropped);
+	rxq->rx_bytes += rx_bytes;
+	rxq->rx_dropped += dropped;
+
+	return count;
+}
+
+static void kick_tx(struct pkt_tx_queue *txq)
+{
+	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
+	struct rte_ring *buf_ring = txq->pair->umem->buf_ring;
+	struct xdp_umem_uqueue *cq = &txq->pair->umem->cq;
+	int fd = txq->pair->xsk_fd;
+	int ret, n;
+
+	while (1) {
+
+		ret = sendto(fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+		/* everything is ok */
+		if (ret >= 0)
+			break;
+
+		/* some thing unexpected */
+		if (errno != EBUSY && errno != EAGAIN)
+			break;
+
+		/* pull from complete qeueu to leave more space */
+		if (errno == EAGAIN) {
+			n = umem_complete_from_kernel(cq,
+					(uint64_t *)&addrs[0],
+					ETH_AF_XDP_TX_BATCH_SIZE);
+			if (n > 0)
+				rte_ring_enqueue_bulk(buf_ring,
+					addrs, n, NULL);
+		}
+	}
+}
+
+static uint16_t
+eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct pkt_tx_queue *txq = queue;
+	struct xdp_uqueue *uq = &txq->tx;
+	struct xdp_umem_uqueue *cq = &txq->pair->umem->cq;
+	struct rte_mbuf *mbuf;
+	struct xdp_desc descs[ETH_AF_XDP_TX_BATCH_SIZE];
+	void *addrs[ETH_AF_XDP_TX_BATCH_SIZE];
+	uint16_t i, valid;
+	unsigned long tx_bytes = 0;
+
+	nb_pkts = nb_pkts < ETH_AF_XDP_TX_BATCH_SIZE ?
+		  nb_pkts : ETH_AF_XDP_TX_BATCH_SIZE;
+
+	int n = umem_complete_from_kernel(cq, (uint64_t *)&addrs[0],
+					  ETH_AF_XDP_TX_BATCH_SIZE);
+	if (n > 0)
+		rte_ring_enqueue_bulk(txq->pair->umem->buf_ring,
+				addrs, n, NULL);
+
+	nb_pkts = rte_ring_dequeue_bulk(txq->pair->umem->buf_ring, addrs,
+					nb_pkts, NULL);
+	if (!nb_pkts)
+		return 0;
+
+	valid = 0;
+	for (i = 0; i < nb_pkts; i++) {
+		char *pkt;
+		unsigned int buf_len =
+			ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM;
+		mbuf = bufs[i];
+		if (mbuf->pkt_len <= buf_len) {
+			descs[valid].addr = (uint64_t)addrs[valid];
+			descs[valid].len = mbuf->pkt_len;
+			descs[valid].options = 0;
+			pkt = get_pkt_data(txq->pair->umem, descs[valid].addr);
+			memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+			       descs[i].len);
+			valid++;
+			tx_bytes += mbuf->pkt_len;
+		}
+		rte_pktmbuf_free(mbuf);
+	}
+
+	if (xq_enq(uq, descs, valid)) {
+		valid = 0;
+		tx_bytes = 0;
+	} else {
+		kick_tx(txq);
+	}
+
+	if (valid < nb_pkts)
+		rte_ring_enqueue_bulk(txq->pair->umem->buf_ring, &addrs[valid],
+				      nb_pkts - valid, NULL);
+
+	txq->err_pkts += (nb_pkts - valid);
+	txq->tx_pkts += valid;
+	txq->tx_bytes += tx_bytes;
+
+	return nb_pkts;
+}
+
+static void
+fill_rx_desc(struct xdp_umem *umem)
+{
+	struct xdp_umem_uqueue *fq = &umem->fq;
+	void *p = NULL;
+	uint32_t i;
+
+	for (i = 0; i < fq->size / 2; i++) {
+		rte_ring_dequeue(umem->buf_ring, &p);
+		if (umem_fill_to_kernel(fq, (uint64_t *)&p, 1)) {
+			rte_ring_enqueue(umem->buf_ring, p);
+			break;
+		}
+	}
+}
+
+static int
+eth_dev_start(struct rte_eth_dev *dev)
+{
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	return 0;
+}
+
+/* This function gets called when the current port gets stopped. */
+static void
+eth_dev_stop(struct rte_eth_dev *dev)
+{
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static int
+eth_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	/* rx/tx must be paired */
+	if (dev->data->nb_rx_queues != dev->data->nb_tx_queues)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void
+eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	dev_info->if_index = internals->if_index;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN;
+	dev_info->max_rx_queues = internals->xsk_map_key_count;
+	dev_info->max_tx_queues = internals->xsk_map_key_count;
+	dev_info->min_rx_bufsize = 0;
+
+	dev_info->default_rxportconf.nb_queues = 1;
+	dev_info->default_txportconf.nb_queues = 1;
+	dev_info->default_rxportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS;
+	dev_info->default_txportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS;
+}
+
+static int
+eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	struct xdp_statistics xdp_stats;
+	struct pkt_rx_queue *rxq;
+	socklen_t optlen;
+	int i;
+
+	optlen = sizeof(struct xdp_statistics);
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		rxq = &internals->rx_queues[i];
+		stats->q_ipackets[i] = internals->rx_queues[i].rx_pkts;
+		stats->q_ibytes[i] = internals->rx_queues[i].rx_bytes;
+
+		stats->q_opackets[i] = internals->tx_queues[i].tx_pkts;
+		stats->q_errors[i] = internals->tx_queues[i].err_pkts;
+		stats->q_obytes[i] = internals->tx_queues[i].tx_bytes;
+
+		stats->ipackets += stats->q_ipackets[i];
+		stats->ibytes += stats->q_ibytes[i];
+		stats->imissed += internals->rx_queues[i].rx_dropped;
+		getsockopt(rxq->xsk_fd, SOL_XDP, XDP_STATISTICS,
+				&xdp_stats, &optlen);
+		stats->imissed += xdp_stats.rx_dropped;
+
+		stats->opackets += stats->q_opackets[i];
+		stats->oerrors += stats->q_errors[i];
+		stats->obytes += stats->q_obytes[i];
+	}
+
+	return 0;
+}
+
+static void
+eth_stats_reset(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int i;
+
+	for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
+		internals->rx_queues[i].rx_pkts = 0;
+		internals->rx_queues[i].rx_bytes = 0;
+		internals->rx_queues[i].rx_dropped = 0;
+
+		internals->tx_queues[i].tx_pkts = 0;
+		internals->tx_queues[i].err_pkts = 0;
+		internals->tx_queues[i].tx_bytes = 0;
+	}
+}
+
+static void
+eth_dev_close(struct rte_eth_dev *dev __rte_unused)
+{
+}
+
+static void
+eth_queue_release(void *q __rte_unused)
+{
+}
+
+static int
+eth_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static void xdp_umem_destroy(struct xdp_umem *umem)
+{
+	if (umem->frames)
+		free(umem->frames);
+	if (umem->buf_ring)
+		rte_ring_free(umem->buf_ring);
+
+	free(umem);
+}
+
+static struct xdp_umem *xdp_umem_configure(int sfd)
+{
+	int fq_size = ETH_AF_XDP_FQ_NUM_DESCS;
+	int cq_size = ETH_AF_XDP_CQ_NUM_DESCS;
+	struct xdp_mmap_offsets off;
+	struct xdp_umem_reg mr;
+	struct xdp_umem *umem;
+	char ring_name[0x100];
+	socklen_t optlen;
+	void *bufs = NULL;
+	uint64_t i;
+
+	umem = calloc(1, sizeof(*umem));
+	if (!umem)
+		return NULL;
+
+	snprintf(ring_name, 0x100, "%s_%d", "af_xdp_ring", sfd);
+	umem->buf_ring = rte_ring_create(ring_name,
+					 ETH_AF_XDP_NUM_BUFFERS,
+					 SOCKET_ID_ANY,
+					 0x0);
+	if (!umem->buf_ring) {
+		RTE_LOG(ERR, PMD,
+			"Failed to create rte_ring\n");
+		goto err;
+	}
+
+	for (i = 0; i < ETH_AF_XDP_NUM_BUFFERS; i++)
+		rte_ring_enqueue(umem->buf_ring,
+				 (void *)(i * ETH_AF_XDP_FRAME_SIZE +
+					  ETH_AF_XDP_DATA_HEADROOM));
+
+	if (posix_memalign(&bufs, getpagesize(), /* PAGE_SIZE aligned */
+		ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE)) {
+		RTE_LOG(ERR, PMD,
+			"Failed to allocate memory pool.\n");
+		goto err;
+	}
+
+	mr.addr = (uint64_t)bufs;
+	mr.len = ETH_AF_XDP_NUM_BUFFERS * ETH_AF_XDP_FRAME_SIZE;
+	mr.chunk_size = ETH_AF_XDP_FRAME_SIZE;
+	mr.headroom = ETH_AF_XDP_DATA_HEADROOM;
+
+	if (setsockopt(sfd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr))) {
+		RTE_LOG(ERR, PMD,
+			"Failed to register memory pool.\n");
+		goto err;
+	}
+
+	if (setsockopt(sfd, SOL_XDP, XDP_UMEM_FILL_RING, &fq_size,
+		       sizeof(int))) {
+		RTE_LOG(ERR, PMD,
+			"Failed to setup fill ring.\n");
+		goto err;
+	}
+
+	if (setsockopt(sfd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cq_size,
+		       sizeof(int))) {
+		RTE_LOG(ERR, PMD,
+			"Failed to setup complete ring.\n");
+		goto err;
+	}
+
+	optlen = sizeof(off);
+	if (getsockopt(sfd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen)) {
+		RTE_LOG(ERR, PMD,
+			"Failed to get map fr/cr offset.\n");
+		goto err;
+	}
+
+	umem->fq.map = mmap(0, off.fr.desc +
+			    fq_size * sizeof(uint64_t),
+			    PROT_READ | PROT_WRITE,
+			    MAP_SHARED | MAP_POPULATE, sfd,
+			    XDP_UMEM_PGOFF_FILL_RING);
+
+	if (umem->fq.ring == MAP_FAILED) {
+		RTE_LOG(ERR, PMD,
+			"Failed to allocate memory for fq.\n");
+		goto err;
+	}
+
+	umem->fq.mask = fq_size - 1;
+	umem->fq.size = fq_size;
+	umem->fq.producer =
+		(uint32_t *)((uint64_t)umem->fq.map + off.fr.producer);
+	umem->fq.consumer =
+		(uint32_t *)((uint64_t)umem->fq.map + off.fr.consumer);
+	umem->fq.ring = (uint64_t *)((uint64_t)umem->fq.map + off.fr.desc);
+	umem->fq.cached_cons = fq_size;
+
+	umem->cq.map = mmap(0, off.cr.desc +
+			    cq_size * sizeof(uint64_t),
+			    PROT_READ | PROT_WRITE,
+			    MAP_SHARED | MAP_POPULATE, sfd,
+			    XDP_UMEM_PGOFF_COMPLETION_RING);
+
+	if (umem->cq.ring == MAP_FAILED) {
+		RTE_LOG(ERR, PMD,
+			"Failed to allocate memory for caq\n");
+		goto err;
+	}
+
+	umem->cq.mask = cq_size - 1;
+	umem->cq.size = cq_size;
+	umem->cq.producer =
+		(uint32_t *)((uint64_t)umem->cq.map + off.cr.producer);
+	umem->cq.consumer =
+		(uint32_t *)((uint64_t)umem->cq.map + off.cr.consumer);
+	umem->cq.ring = (uint64_t *)((uint64_t)umem->cq.map + off.cr.desc);
+
+	umem->frames = bufs;
+	umem->fd = sfd;
+
+	return umem;
+
+err:
+	xdp_umem_destroy(umem);
+	return NULL;
+
+}
+
+static int
+xsk_configure(struct pkt_rx_queue *rxq, int ring_size, struct xdp_umem *umem)
+{
+	struct pkt_tx_queue *txq = rxq->pair;
+	struct xdp_mmap_offsets off;
+	int new_umem = 0;
+	socklen_t optlen;
+
+	rxq->xsk_fd = socket(PF_XDP, SOCK_RAW, 0);
+	if (rxq->xsk_fd < 0)
+		return -1;
+
+	if (!umem) {
+		rxq->umem = xdp_umem_configure(rxq->xsk_fd);
+		if (!rxq->umem)
+			goto err;
+		new_umem = 1;
+	} else {
+		rxq->umem = umem;
+	}
+
+	if (setsockopt(rxq->xsk_fd, SOL_XDP, XDP_RX_RING,
+		       &ring_size, sizeof(int))) {
+		RTE_LOG(ERR, PMD, "Failed to setup Rx ring.\n");
+		goto err;
+	}
+
+	if (setsockopt(rxq->xsk_fd, SOL_XDP, XDP_TX_RING,
+		       &ring_size, sizeof(int))) {
+		RTE_LOG(ERR, PMD, "Failed to setup Tx ring.\n");
+		goto err;
+	}
+
+	optlen = sizeof(off);
+	if (getsockopt(rxq->xsk_fd, SOL_XDP, XDP_MMAP_OFFSETS,
+		       &off, &optlen)) {
+		RTE_LOG(ERR, PMD, "Failed to get map rx/tx offsets.\n");
+		goto err;
+	}
+
+	/* Rx */
+	rxq->rx.map = mmap(NULL,
+			   off.rx.desc +
+			   ring_size * sizeof(struct xdp_desc),
+			   PROT_READ | PROT_WRITE,
+			   MAP_SHARED | MAP_POPULATE, rxq->xsk_fd,
+			   XDP_PGOFF_RX_RING);
+
+	if (rxq->rx.ring == MAP_FAILED) {
+		RTE_LOG(ERR, PMD, "Failed to map Rx ring memory.\n");
+		goto err;
+	}
+
+	fill_rx_desc(rxq->umem);
+	/* Tx */
+	txq->tx.map = mmap(NULL,
+			   off.tx.desc +
+			   ring_size * sizeof(struct xdp_desc),
+			   PROT_READ | PROT_WRITE,
+			   MAP_SHARED | MAP_POPULATE, rxq->xsk_fd,
+			   XDP_PGOFF_TX_RING);
+
+	if (txq->tx.ring == MAP_FAILED) {
+		RTE_LOG(ERR, PMD, "Failed to map Tx ring memory\n");
+		goto err;
+	}
+
+	rxq->rx.mask = ring_size - 1;
+	rxq->rx.size = ring_size;
+	rxq->rx.producer =
+		(uint32_t *)((uint64_t)rxq->rx.map + off.rx.producer);
+	rxq->rx.consumer =
+		(uint32_t *)((uint64_t)rxq->rx.map + off.rx.consumer);
+	rxq->rx.ring = (struct xdp_desc *)((uint64_t)rxq->rx.map + off.rx.desc);
+
+	txq->tx.mask = ring_size - 1;
+	txq->tx.size = ring_size;
+	txq->tx.producer =
+		(uint32_t *)((uint64_t)txq->tx.map + off.tx.producer);
+	txq->tx.consumer =
+		(uint32_t *)((uint64_t)txq->tx.map + off.tx.consumer);
+	txq->tx.ring = (struct xdp_desc *)((uint64_t)txq->tx.map + off.tx.desc);
+	txq->tx.cached_cons = ring_size;
+
+	return 0;
+
+err:
+	if (new_umem)
+		xdp_umem_destroy(rxq->umem);
+	close(rxq->xsk_fd);
+	rxq->xsk_fd = 0;
+
+	return -1;
+}
+
+static void
+queue_reset(struct pmd_internals *internals, uint16_t queue_idx)
+{
+	struct pkt_rx_queue *rxq = &internals->rx_queues[queue_idx];
+	struct pkt_tx_queue *txq = rxq->pair;
+
+	if (rxq->xsk_fd) {
+		close(rxq->xsk_fd);
+		if (internals->umem_share_count > 0) {
+			internals->umem_share_count--;
+			if (internals->umem_share_count == 0 &&
+			    internals->umem_share) {
+				xdp_umem_destroy(internals->umem_share);
+				internals->umem_share = NULL;
+			}
+		}
+	}
+	memset(rxq, 0, sizeof(*rxq));
+	memset(txq, 0, sizeof(*txq));
+	rxq->pair = txq;
+	txq->pair = rxq;
+	rxq->queue_idx = queue_idx;
+	txq->queue_idx = queue_idx;
+}
+
+static int
+eth_rx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t rx_queue_id,
+		   uint16_t nb_rx_desc,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_rxconf *rx_conf __rte_unused,
+		   struct rte_mempool *mb_pool)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	unsigned int buf_size, data_size;
+	struct pkt_rx_queue *rxq;
+	struct sockaddr_xdp sxdp = {0};
+	int xsk_key;
+	int map_fd;
+
+	if (dev->data->nb_rx_queues <= rx_queue_id) {
+		RTE_LOG(ERR, PMD,
+			"Invalid rx queue id: %d\n", rx_queue_id);
+		return -EINVAL;
+	}
+
+	rxq = &internals->rx_queues[rx_queue_id];
+	queue_reset(internals, rx_queue_id);
+
+	/* Now get the space available for data in the mbuf */
+	buf_size = rte_pktmbuf_data_room_size(mb_pool) -
+		RTE_PKTMBUF_HEADROOM;
+	data_size = ETH_AF_XDP_FRAME_SIZE - ETH_AF_XDP_DATA_HEADROOM;
+
+	if (data_size > buf_size) {
+		RTE_LOG(ERR, PMD,
+			"%s: %d bytes will not fit in mbuf (%d bytes)\n",
+			dev->device->name, data_size, buf_size);
+		return -ENOMEM;
+	}
+
+	rxq->mb_pool = mb_pool;
+
+	if (xsk_configure(rxq, nb_rx_desc, internals->umem_share)) {
+		RTE_LOG(ERR, PMD,
+			"Failed to configure xdp socket\n");
+		return -EINVAL;
+	}
+
+	sxdp.sxdp_family = PF_XDP;
+	sxdp.sxdp_ifindex = internals->if_index;
+	sxdp.sxdp_queue_id = internals->queue_idx;
+	sxdp.sxdp_flags = 0;
+	if (internals->umem_share) {
+		RTE_LOG(INFO, PMD,
+			"use share umem at queue id %d\n", rx_queue_id);
+		sxdp.sxdp_flags = XDP_SHARED_UMEM;
+		sxdp.sxdp_shared_umem_fd = internals->umem_share->fd;
+	}
+
+	if (bind(rxq->xsk_fd, (struct sockaddr *)&sxdp, sizeof(sxdp))) {
+		RTE_LOG(ERR, PMD, "Failed to bind xdp socket\n");
+		if (!internals->umem_share)
+			xdp_umem_destroy(rxq->umem);
+		goto err;
+	}
+
+	if (!internals->umem_share)
+		internals->umem_share = rxq->umem;
+
+	internals->umem_share_count++;
+	map_fd = bpf_map_get_fd_by_id(internals->xsk_map_id);
+
+	xsk_key = internals->xsk_map_key_start + rx_queue_id;
+	if (bpf_map_update_elem(map_fd, &xsk_key, &rxq->xsk_fd, 0)) {
+		RTE_LOG(ERR, PMD,
+			"Failed to update xsk map\n");
+		goto err;
+	}
+
+	dev->data->rx_queues[rx_queue_id] = rxq;
+	return 0;
+
+err:
+	queue_reset(internals, rx_queue_id);
+	return -EINVAL;
+}
+
+static int
+eth_tx_queue_setup(struct rte_eth_dev *dev,
+		   uint16_t tx_queue_id,
+		   uint16_t nb_tx_desc,
+		   unsigned int socket_id __rte_unused,
+		   const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	struct pkt_tx_queue *txq;
+
+	if (dev->data->nb_tx_queues <= tx_queue_id) {
+		RTE_LOG(ERR, PMD, "Invalid tx queue id: %d\n", tx_queue_id);
+		return -EINVAL;
+	}
+
+	RTE_LOG(WARNING, PMD, "Warning tx queue setup size=%d will be skipped\n",
+		nb_tx_desc);
+	txq = &internals->tx_queues[tx_queue_id];
+
+	dev->data->tx_queues[tx_queue_id] = txq;
+	return 0;
+}
+
+static int
+eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	struct ifreq ifr = { .ifr_mtu = mtu };
+	int ret;
+	int s;
+
+	s = socket(PF_INET, SOCK_DGRAM, 0);
+	if (s < 0)
+		return -EINVAL;
+
+	snprintf(ifr.ifr_name, IFNAMSIZ, "%s", internals->if_name);
+	ret = ioctl(s, SIOCSIFMTU, &ifr);
+	close(s);
+
+	if (ret < 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void
+eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask)
+{
+	struct ifreq ifr;
+	int s;
+
+	s = socket(PF_INET, SOCK_DGRAM, 0);
+	if (s < 0)
+		return;
+
+	snprintf(ifr.ifr_name, IFNAMSIZ, "%s", if_name);
+	if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0)
+		goto out;
+	ifr.ifr_flags &= mask;
+	ifr.ifr_flags |= flags;
+	if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0)
+		goto out;
+out:
+	close(s);
+}
+
+static void
+eth_dev_promiscuous_enable(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0);
+}
+
+static void
+eth_dev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC);
+}
+
+static const struct eth_dev_ops ops = {
+	.dev_start = eth_dev_start,
+	.dev_stop = eth_dev_stop,
+	.dev_close = eth_dev_close,
+	.dev_configure = eth_dev_configure,
+	.dev_infos_get = eth_dev_info,
+	.mtu_set = eth_dev_mtu_set,
+	.promiscuous_enable = eth_dev_promiscuous_enable,
+	.promiscuous_disable = eth_dev_promiscuous_disable,
+	.rx_queue_setup = eth_rx_queue_setup,
+	.tx_queue_setup = eth_tx_queue_setup,
+	.rx_queue_release = eth_queue_release,
+	.tx_queue_release = eth_queue_release,
+	.link_update = eth_link_update,
+	.stats_get = eth_stats_get,
+	.stats_reset = eth_stats_reset,
+};
+
+static struct rte_vdev_driver pmd_af_xdp_drv;
+
+static void
+parse_parameters(struct rte_kvargs *kvlist,
+		 char **if_name,
+		 int *queue_idx,
+		 int *xsk_map_id,
+		 int *xsk_map_key_start,
+		 int *xsk_map_key_count)
+{
+	struct rte_kvargs_pair *pair = NULL;
+	unsigned int k_idx;
+
+	for (k_idx = 0; k_idx < kvlist->count; k_idx++) {
+		pair = &kvlist->pairs[k_idx];
+		if (strstr(pair->key, ETH_AF_XDP_IFACE_ARG))
+			*if_name = pair->value;
+		else if (strstr(pair->key, ETH_AF_XDP_QUEUE_IDX_ARG))
+			*queue_idx = atoi(pair->value);
+		else if (strstr(pair->key, ETH_AF_XDP_XSK_MAP_ID_ARG))
+			*xsk_map_id = atoi(pair->value);
+		else if (strstr(pair->value, ETH_AF_XDP_XSK_MAP_KEY_START_ARG))
+			*xsk_map_key_start = atoi(pair->value);
+		else if (strstr(pair->key, ETH_AF_XDP_XSK_MAP_KEY_COUNT_ARG))
+			*xsk_map_key_count = atoi(pair->value);
+	}
+}
+
+static int
+get_iface_info(const char *if_name,
+	       struct ether_addr *eth_addr,
+	       int *if_index)
+{
+	struct ifreq ifr;
+	int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
+
+	if (sock < 0)
+		return -1;
+
+	strcpy(ifr.ifr_name, if_name);
+	if (ioctl(sock, SIOCGIFINDEX, &ifr))
+		goto error;
+
+	if (ioctl(sock, SIOCGIFHWADDR, &ifr))
+		goto error;
+
+	memcpy(eth_addr, ifr.ifr_hwaddr.sa_data, 6);
+
+	close(sock);
+	*if_index = if_nametoindex(if_name);
+	return 0;
+
+error:
+	close(sock);
+	return -1;
+}
+
+static int
+init_internals(struct rte_vdev_device *dev,
+	       const char *if_name,
+	       int queue_idx,
+	       int xsk_map_id,
+	       int xsk_map_key_start,
+	       int xsk_map_key_count)
+{
+	const char *name = rte_vdev_device_name(dev);
+	struct rte_eth_dev *eth_dev = NULL;
+	const unsigned int numa_node = dev->device.numa_node;
+	struct pmd_internals *internals = NULL;
+	int ret;
+	int i;
+
+	internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
+	if (!internals)
+		return -ENOMEM;
+
+	internals->queue_idx = queue_idx;
+	internals->xsk_map_id = xsk_map_id;
+	internals->xsk_map_key_start = xsk_map_key_start;
+	internals->xsk_map_key_count = xsk_map_key_count;
+	strcpy(internals->if_name, if_name);
+
+	for (i = 0; i < ETH_AF_XDP_MAX_QUEUE_PAIRS; i++) {
+		internals->tx_queues[i].pair = &internals->rx_queues[i];
+		internals->rx_queues[i].pair = &internals->tx_queues[i];
+	}
+
+	ret = get_iface_info(if_name, &internals->eth_addr,
+			     &internals->if_index);
+	if (ret)
+		goto err;
+
+	eth_dev = rte_eth_vdev_allocate(dev, 0);
+	if (!eth_dev)
+		goto err;
+
+	eth_dev->data->dev_private = internals;
+	eth_dev->data->dev_link = pmd_link;
+	eth_dev->data->mac_addrs = &internals->eth_addr;
+	eth_dev->dev_ops = &ops;
+	eth_dev->rx_pkt_burst = eth_af_xdp_rx;
+	eth_dev->tx_pkt_burst = eth_af_xdp_tx;
+
+	rte_eth_dev_probing_finish(eth_dev);
+	return 0;
+
+err:
+	rte_free(internals);
+	return -1;
+}
+
+static int
+rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
+{
+	struct rte_kvargs *kvlist;
+	char *if_name = NULL;
+	int queue_idx = ETH_AF_XDP_DFLT_QUEUE_IDX;
+	struct rte_eth_dev *eth_dev;
+	int xsk_map_id = -1;
+	int xsk_map_key_start = 0;
+	int xsk_map_key_count = 1;
+	const char *name;
+	int ret;
+
+	RTE_LOG(INFO, PMD, "Initializing pmd_af_packet for %s\n",
+		rte_vdev_device_name(dev));
+
+	name = rte_vdev_device_name(dev);
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+		strlen(rte_vdev_device_args(dev)) == 0) {
+		eth_dev = rte_eth_dev_attach_secondary(name);
+		if (!eth_dev) {
+			RTE_LOG(ERR, PMD, "Failed to probe %s\n", name);
+			return -EINVAL;
+		}
+		eth_dev->dev_ops = &ops;
+		rte_eth_dev_probing_finish(eth_dev);
+	}
+
+	kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
+	if (!kvlist) {
+		RTE_LOG(ERR, PMD,
+			"Invalid kvargs\n");
+		return -EINVAL;
+	}
+
+	if (dev->device.numa_node == SOCKET_ID_ANY)
+		dev->device.numa_node = rte_socket_id();
+
+	parse_parameters(kvlist, &if_name,
+			 &queue_idx,
+			 &xsk_map_id,
+			 &xsk_map_key_start,
+			 &xsk_map_key_count);
+
+	if (xsk_map_id < 0) {
+		RTE_LOG(ERR, PMD,
+			"Invalid map id\n");
+		return -EINVAL;
+	}
+	ret = init_internals(dev, if_name, queue_idx, xsk_map_id,
+			     xsk_map_key_start, xsk_map_key_count);
+
+	rte_kvargs_free(kvlist);
+
+	return ret;
+}
+
+static int
+rte_pmd_af_xdp_remove(struct rte_vdev_device *dev)
+{
+	struct rte_eth_dev *eth_dev = NULL;
+	struct pmd_internals *internals;
+	int i;
+
+	RTE_LOG(INFO, PMD, "Closing AF_XDP ethdev on numa socket %u\n",
+		rte_socket_id());
+
+	if (!dev)
+		return -1;
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
+	if (!eth_dev)
+		return -1;
+
+	internals = eth_dev->data->dev_private;
+
+	for (i = 0; i < internals->xsk_map_key_count; i++)
+		queue_reset(internals, i);
+
+	rte_ring_free(internals->umem_share->buf_ring);
+	rte_free(internals->umem_share->frames);
+	rte_free(internals->umem_share);
+	rte_free(internals);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_af_xdp_drv = {
+	.probe = rte_pmd_af_xdp_probe,
+	.remove = rte_pmd_af_xdp_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_af_xdp, pmd_af_xdp_drv);
+RTE_PMD_REGISTER_ALIAS(net_af_xdp, eth_af_xdp);
+RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
+			      "iface=<string> "
+			      "queue=<int> "
+			      "xsk_map_id=<int> "
+			      "xsk_map_key_start=<int> "
+			      "xsk_map_key_count=<ind> ");
diff --git a/drivers/net/af_xdp/rte_pmd_af_xdp_version.map b/drivers/net/af_xdp/rte_pmd_af_xdp_version.map
new file mode 100644
index 000000000..ef3539840
--- /dev/null
+++ b/drivers/net/af_xdp/rte_pmd_af_xdp_version.map
@@ -0,0 +1,4 @@ 
+DPDK_2.0 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index de33883be..428ad8ab0 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -118,6 +118,7 @@  _LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA2_MEMPOOL)  += -lrte_mempool_dpaa2
 endif
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET)  += -lrte_pmd_af_packet
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP)     += -lrte_pmd_af_xdp -lelf -lbpf
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ARK_PMD)        += -lrte_pmd_ark
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVF_PMD)        += -lrte_pmd_avf
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVP_PMD)        += -lrte_pmd_avp