diff mbox

[dpdk-dev,v3] net/kni: add KNI PMD

Message ID 20161010131946.13303-1-ferruh.yigit@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Bruce Richardson
Headers show

Commit Message

Ferruh Yigit Oct. 10, 2016, 1:19 p.m. UTC
Add KNI PMD which wraps librte_kni for ease of use.

KNI PMD can be used as any regular PMD to send / receive packets to the
Linux networking stack.

Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
---

v3:
* rebase on top of latest master

v2:
* updated driver name eth_kni -> net_kni
---
 config/common_base                      |   1 +
 config/common_linuxapp                  |   1 +
 drivers/net/Makefile                    |   1 +
 drivers/net/kni/Makefile                |  63 +++++
 drivers/net/kni/rte_eth_kni.c           | 463 ++++++++++++++++++++++++++++++++
 drivers/net/kni/rte_pmd_kni_version.map |   4 +
 mk/rte.app.mk                           |  10 +-
 7 files changed, 538 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/kni/Makefile
 create mode 100644 drivers/net/kni/rte_eth_kni.c
 create mode 100644 drivers/net/kni/rte_pmd_kni_version.map

Comments

Yong Wang Nov. 3, 2016, 1:24 a.m. UTC | #1
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ferruh Yigit
> Sent: Monday, October 10, 2016 6:20 AM
> To: dev@dpdk.org
> Cc: Ferruh Yigit <ferruh.yigit@intel.com>
> Subject: [dpdk-dev] [PATCH v3] net/kni: add KNI PMD
> 
> Add KNI PMD which wraps librte_kni for ease of use.
> 
> KNI PMD can be used as any regular PMD to send / receive packets to the
> Linux networking stack.
> 
> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
> ---
> 
> v3:
> * rebase on top of latest master
> 
> v2:
> * updated driver name eth_kni -> net_kni
> ---
>  config/common_base                      |   1 +
>  config/common_linuxapp                  |   1 +
>  drivers/net/Makefile                    |   1 +
>  drivers/net/kni/Makefile                |  63 +++++
>  drivers/net/kni/rte_eth_kni.c           | 463
> ++++++++++++++++++++++++++++++++
>  drivers/net/kni/rte_pmd_kni_version.map |   4 +
>  mk/rte.app.mk                           |  10 +-
>  7 files changed, 538 insertions(+), 5 deletions(-)
>  create mode 100644 drivers/net/kni/Makefile
>  create mode 100644 drivers/net/kni/rte_eth_kni.c
>  create mode 100644 drivers/net/kni/rte_pmd_kni_version.map
> 
> diff --git a/config/common_base b/config/common_base
> index f5d2eff..03b93c7 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -543,6 +543,7 @@ CONFIG_RTE_PIPELINE_STATS_COLLECT=n
>  # Compile librte_kni
>  #
>  CONFIG_RTE_LIBRTE_KNI=n
> +CONFIG_RTE_LIBRTE_PMD_KNI=n

Nit: change this to CONFIG_RTE_LIBRTE_KNI_PMD instead to be consistent with all other pmds.

>  CONFIG_RTE_KNI_KMOD=n
>  CONFIG_RTE_KNI_PREEMPT_DEFAULT=y
>  CONFIG_RTE_KNI_KO_DEBUG=n
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 2483dfa..2ecd510 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -39,6 +39,7 @@ CONFIG_RTE_EAL_IGB_UIO=y
>  CONFIG_RTE_EAL_VFIO=y
>  CONFIG_RTE_KNI_KMOD=y
>  CONFIG_RTE_LIBRTE_KNI=y
> +CONFIG_RTE_LIBRTE_PMD_KNI=y
>  CONFIG_RTE_LIBRTE_VHOST=y
>  CONFIG_RTE_LIBRTE_PMD_VHOST=y
>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index bc93230..c4771cd 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -41,6 +41,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
>  DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
>  DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
>  DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
>  DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
> diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
> new file mode 100644
> index 0000000..0b7cf91
> --- /dev/null
> +++ b/drivers/net/kni/Makefile
> @@ -0,0 +1,63 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +#     * Redistributions of source code must retain the above copyright
> +#       notice, this list of conditions and the following disclaimer.
> +#     * Redistributions in binary form must reproduce the above copyright
> +#       notice, this list of conditions and the following disclaimer in
> +#       the documentation and/or other materials provided with the
> +#       distribution.
> +#     * Neither the name of Intel Corporation nor the names of its
> +#       contributors may be used to endorse or promote products derived
> +#       from this software without specific prior written permission.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_pmd_kni.a
> +
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +LDLIBS += -lpthread
> +
> +EXPORT_MAP := rte_pmd_kni_version.map
> +
> +LIBABIVER := 1
> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += rte_eth_kni.c
> +
> +#
> +# Export include files
> +#
> +SYMLINK-y-include +=
> +
> +# this lib depends upon:
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_eal
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_ether
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_kni
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mbuf
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mempool
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
> new file mode 100644
> index 0000000..ce9e758
> --- /dev/null
> +++ b/drivers/net/kni/rte_eth_kni.c
> @@ -0,0 +1,463 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include <fcntl.h>
> +#include <pthread.h>
> +#include <unistd.h>
> +
> +#include <rte_ethdev.h>
> +#include <rte_kni.h>
> +#include <rte_malloc.h>
> +#include <rte_vdev.h>
> +
> +#define KNI_MAX_QUEUE_PER_PORT 8
> +#define MAX_PACKET_SZ 2048
> +#define MAX_KNI_PORTS 8
> +#define DRV_NAME net_kni

The name generated this way is not consistent with other vdevs.  Why not simply assign "KNI PMD" to drv_name?

> +
> +struct pmd_queue_stats {
> +	uint64_t pkts;
> +	uint64_t bytes;
> +	uint64_t err_pkts;
> +};
> +
> +struct pmd_queue {
> +	struct pmd_internals *internals;
> +	struct rte_mempool *mb_pool;
> +
> +	struct pmd_queue_stats rx;
> +	struct pmd_queue_stats tx;
> +};
> +
> +struct pmd_internals {
> +	struct rte_kni *kni;
> +	int is_kni_started;
> +
> +	pthread_t thread;
> +	int stop_thread;
> +
> +	struct pmd_queue rx_queues[KNI_MAX_QUEUE_PER_PORT];
> +	struct pmd_queue tx_queues[KNI_MAX_QUEUE_PER_PORT];
> +};
> +
> +static struct ether_addr eth_addr;
> +static const char *drivername = RTE_STR(DRV_NAME);
> +static struct rte_eth_link pmd_link = {
> +		.link_speed = 10000,
> +		.link_duplex = ETH_LINK_FULL_DUPLEX,
> +		.link_status = 0
> +};
> +static int is_kni_initialized;
> +
> +static uint16_t
> +eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> +{
> +	struct pmd_queue *kni_q = q;
> +	struct rte_kni *kni = kni_q->internals->kni;
> +	uint16_t nb_pkts;
> +
> +	nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
> +
> +	kni_q->rx.pkts += nb_pkts;
> +	kni_q->rx.err_pkts += nb_bufs - nb_pkts;
> +
> +	return nb_pkts;
> +}
> +

I don't think it's safe to do receive from two queues concurrently on two cores sharing the same underlying KNI device due to the current limitation of KNI user-space queues not being multi-thread safe.  Is the proposed plan to have the application layer implement synchronization logic?  If that's the case, it needs to be clearly documented and depending on the implementation, measurable overhead will be incurred.  Otherwise (only single-queue supported), could you check queue number if the application tries to configure multi-queue?

> +static uint16_t
> +eth_kni_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> +{
> +	struct pmd_queue *kni_q = q;
> +	struct rte_kni *kni = kni_q->internals->kni;
> +	uint16_t nb_pkts;
> +
> +	nb_pkts =  rte_kni_tx_burst(kni, bufs, nb_bufs);
> +
> +	kni_q->tx.pkts += nb_pkts;
> +	kni_q->tx.err_pkts += nb_bufs - nb_pkts;
> +
> +	return nb_pkts;
> +}
> +
> +static void *
> +kni_handle_request(void *param)
> +{
> +	struct pmd_internals *internals = param;
> +#define MS 1000
> +
> +	while (!internals->stop_thread) {
> +		rte_kni_handle_request(internals->kni);
> +		usleep(500 * MS);
> +	}
> +
> +	return param;
> +}
> +
> +static int
> +eth_kni_start(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	uint16_t port_id = dev->data->port_id;
> +	struct rte_mempool *mb_pool;
> +	struct rte_kni_conf conf;
> +	const char *name = dev->data->name + 4; /* remove eth_ */
> +
> +	snprintf(conf.name, RTE_KNI_NAMESIZE, "%s", name);
> +	conf.force_bind = 0;
> +	conf.group_id = port_id;
> +	conf.mbuf_size = MAX_PACKET_SZ;
> +	mb_pool = internals->rx_queues[0].mb_pool;
> +
> +	internals->kni = rte_kni_alloc(mb_pool, &conf, NULL);
> +	if (internals->kni == NULL) {
> +		RTE_LOG(ERR, PMD,
> +			"Fail to create kni for port: %d\n", port_id);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +eth_kni_dev_start(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	int ret;
> +
> +	if (internals->is_kni_started == 0) {
> +		ret = eth_kni_start(dev);
> +		if (ret)
> +			return -1;
> +		internals->is_kni_started = 1;
> +	}
> +
> +	ret = pthread_create(&internals->thread, NULL, kni_handle_request,
> +			internals);
> +	if (ret) {
> +		RTE_LOG(ERR, PMD, "Fail to create kni request thread\n");
> +		return -1;
> +	}
> +
> +	dev->data->dev_link.link_status = 1;
> +
> +	return 0;
> +}
> +
> +static void
> +eth_kni_dev_stop(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	int ret;
> +
> +	internals->stop_thread = 1;
> +
> +	ret = pthread_cancel(internals->thread);
> +	if (ret)
> +		RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
> +
> +	ret = pthread_join(internals->thread, NULL);
> +	if (ret)
> +		RTE_LOG(ERR, PMD, "Can't join the thread\n");
> +
> +	internals->stop_thread = 0;
> +
> +	dev->data->dev_link.link_status = 0;
> +}
> +
> +static int
> +eth_kni_dev_configure(struct rte_eth_dev *dev __rte_unused)
> +{
> +	return 0;
> +}
> +
> +static void
> +eth_kni_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info
> *dev_info)
> +{
> +	struct rte_eth_dev_data *data = dev->data;
> +	struct pmd_internals *internals = data->dev_private;
> +
> +	dev_info->driver_name = data->drv_name;
> +	dev_info->max_mac_addrs = 1;
> +	dev_info->max_rx_pktlen = (uint32_t)-1;
> +	dev_info->max_rx_queues = RTE_DIM(internals->rx_queues);
> +	dev_info->max_tx_queues = RTE_DIM(internals->tx_queues);
> +	dev_info->min_rx_bufsize = 0;
> +	dev_info->pci_dev = NULL;
> +}
> +
> +static int
> +eth_kni_rx_queue_setup(struct rte_eth_dev *dev,
> +		uint16_t rx_queue_id,
> +		uint16_t nb_rx_desc __rte_unused,
> +		unsigned int socket_id __rte_unused,
> +		const struct rte_eth_rxconf *rx_conf __rte_unused,
> +		struct rte_mempool *mb_pool)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	struct pmd_queue *q;
> +
> +	q = &internals->rx_queues[rx_queue_id];
> +	q->internals = internals;
> +	q->mb_pool = mb_pool;
> +
> +	dev->data->rx_queues[rx_queue_id] = q;
> +
> +	return 0;
> +}
> +
> +static int
> +eth_kni_tx_queue_setup(struct rte_eth_dev *dev,
> +		uint16_t tx_queue_id,
> +		uint16_t nb_tx_desc __rte_unused,
> +		unsigned int socket_id __rte_unused,
> +		const struct rte_eth_txconf *tx_conf __rte_unused)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	struct pmd_queue *q;
> +
> +	q = &internals->tx_queues[tx_queue_id];
> +	q->internals = internals;
> +
> +	dev->data->tx_queues[tx_queue_id] = q;
> +
> +	return 0;
> +}
> +
> +static void
> +eth_kni_queue_release(void *q __rte_unused)
> +{
> +}
> +
> +static int
> +eth_kni_link_update(struct rte_eth_dev *dev __rte_unused,
> +		int wait_to_complete __rte_unused)
> +{
> +	return 0;
> +}
> +
> +static void
> +eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> +{
> +	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
> +	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
> +	struct rte_eth_dev_data *data = dev->data;
> +	unsigned long tx_packets_err_total = 0;
> +	unsigned int i, num_stats;
> +	struct pmd_queue *q;
> +
> +	num_stats = RTE_MIN((unsigned
> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
> +			data->nb_rx_queues);
> +	for (i = 0; i < num_stats; i++) {
> +		q = data->rx_queues[i];
> +		stats->q_ipackets[i] = q->rx.pkts;
> +		stats->q_ibytes[i] = q->rx.bytes;
> +		rx_packets_total += stats->q_ipackets[i];
> +		rx_bytes_total += stats->q_ibytes[i];
> +	}
> +
> +	num_stats = RTE_MIN((unsigned
> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
> +			data->nb_tx_queues);
> +	for (i = 0; i < num_stats; i++) {
> +		q = data->tx_queues[i];
> +		stats->q_opackets[i] = q->tx.pkts;
> +		stats->q_obytes[i] = q->tx.bytes;
> +		stats->q_errors[i] = q->tx.err_pkts;
> +		tx_packets_total += stats->q_opackets[i];
> +		tx_bytes_total += stats->q_obytes[i];
> +		tx_packets_err_total += stats->q_errors[i];
> +	}
> +
> +	stats->ipackets = rx_packets_total;
> +	stats->ibytes = rx_bytes_total;
> +	stats->opackets = tx_packets_total;
> +	stats->obytes = tx_bytes_total;
> +	stats->oerrors = tx_packets_err_total;
> +}
> +
> +static void
> +eth_kni_stats_reset(struct rte_eth_dev *dev)
> +{
> +	struct rte_eth_dev_data *data = dev->data;
> +	struct pmd_queue *q;
> +	unsigned int i;
> +
> +	for (i = 0; i < data->nb_rx_queues; i++) {
> +		q = data->rx_queues[i];
> +		q->rx.pkts = 0;
> +		q->rx.bytes = 0;
> +	}
> +	for (i = 0; i < data->nb_tx_queues; i++) {
> +		q = data->tx_queues[i];
> +		q->tx.pkts = 0;
> +		q->tx.bytes = 0;
> +		q->tx.err_pkts = 0;
> +	}
> +}
> +
> +static const struct eth_dev_ops eth_kni_ops = {
> +	.dev_start = eth_kni_dev_start,
> +	.dev_stop = eth_kni_dev_stop,
> +	.dev_configure = eth_kni_dev_configure,
> +	.dev_infos_get = eth_kni_dev_info,
> +	.rx_queue_setup = eth_kni_rx_queue_setup,
> +	.tx_queue_setup = eth_kni_tx_queue_setup,
> +	.rx_queue_release = eth_kni_queue_release,
> +	.tx_queue_release = eth_kni_queue_release,
> +	.link_update = eth_kni_link_update,
> +	.stats_get = eth_kni_stats_get,
> +	.stats_reset = eth_kni_stats_reset,
> +};
> +
> +static struct rte_eth_dev *
> +eth_kni_create(const char *name, unsigned int numa_node)
> +{
> +	struct pmd_internals *internals = NULL;
> +	struct rte_eth_dev_data *data;
> +	struct rte_eth_dev *eth_dev;
> +	uint16_t nb_rx_queues = 1;
> +	uint16_t nb_tx_queues = 1;

Since these two values are always 1 here, I think they could be removed.

> +
> +	RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n",
> +			numa_node);
> +
> +	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
> +	if (data == NULL)
> +		goto error;
> +
> +	internals = rte_zmalloc_socket(name, sizeof(*internals), 0,
> numa_node);
> +	if (internals == NULL)
> +		goto error;
> +
> +	/* reserve an ethdev entry */
> +	eth_dev = rte_eth_dev_allocate(name);
> +	if (eth_dev == NULL)
> +		goto error;
> +
> +	data->dev_private = internals;
> +	data->port_id = eth_dev->data->port_id;
> +	memmove(data->name, eth_dev->data->name, sizeof(data-
> >name));
> +	data->nb_rx_queues = nb_rx_queues;
> +	data->nb_tx_queues = nb_tx_queues;
> +	data->dev_link = pmd_link;
> +	data->mac_addrs = &eth_addr;
> +
> +	eth_dev->data = data;
> +	eth_dev->dev_ops = &eth_kni_ops;
> +	eth_dev->driver = NULL;
> +
> +	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
> +	data->kdrv = RTE_KDRV_NONE;
> +	data->drv_name = drivername;
> +	data->numa_node = numa_node;
> +
> +	return eth_dev;
> +
> +error:
> +	rte_free(data);
> +	rte_free(internals);
> +
> +	return NULL;
> +}
> +
> +static int
> +kni_init(void)
> +{
> +	if (is_kni_initialized == 0)
> +		rte_kni_init(MAX_KNI_PORTS);
> +
> +	is_kni_initialized += 1;
> +
> +	return 0;
> +}
> +
> +static int
> +eth_kni_probe(const char *name, const char *params __rte_unused)
> +{
> +	struct rte_eth_dev *eth_dev;
> +	int ret;
> +
> +	RTE_LOG(INFO, PMD, "Initializing eth_kni for %s\n", name);
> +
> +	ret = kni_init();
> +	if (ret < 0)
> +		/* Not return error to prevent panic in rte_eal_init() */
> +		return 0;
> +
> +	eth_dev = eth_kni_create(name, rte_socket_id());
> +	if (eth_dev == NULL)
> +		return -1;
> +
> +	eth_dev->rx_pkt_burst = eth_kni_rx;
> +	eth_dev->tx_pkt_burst = eth_kni_tx;
> +
> +	return 0;
> +}
> +
> +static int
> +eth_kni_remove(const char *name)
> +{
> +	struct rte_eth_dev *eth_dev;
> +	struct pmd_internals *internals;
> +
> +	RTE_LOG(INFO, PMD, "Un-Initializing eth_kni for %s\n", name);
> +
> +	/* find the ethdev entry */
> +	eth_dev = rte_eth_dev_allocated(name);
> +	if (eth_dev == NULL)
> +		return -1;
> +
> +	eth_kni_dev_stop(eth_dev);
> +
> +	if (eth_dev->data) {
> +		internals = eth_dev->data->dev_private;
> +		rte_kni_release(internals->kni);
> +
> +		rte_free(internals);
> +	}
> +	rte_free(eth_dev->data);
> +
> +	rte_eth_dev_release_port(eth_dev);
> +
> +	is_kni_initialized -= 1;
> +	if (is_kni_initialized == 0)
> +		rte_kni_close();
> +
> +	return 0;
> +}
> +
> +static struct rte_vdev_driver eth_kni_drv = {
> +	.probe = eth_kni_probe,
> +	.remove = eth_kni_remove,
> +};
> +
> +DRIVER_REGISTER_VDEV(DRV_NAME, eth_kni_drv);
> diff --git a/drivers/net/kni/rte_pmd_kni_version.map
> b/drivers/net/kni/rte_pmd_kni_version.map
> new file mode 100644
> index 0000000..61463bf
> --- /dev/null
> +++ b/drivers/net/kni/rte_pmd_kni_version.map
> @@ -0,0 +1,4 @@
> +DPDK_16.11 {
> +
> +	local: *;
> +};
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index ac50a21..a94983b 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -59,11 +59,6 @@ _LDLIBS-y += -L$(RTE_SDK_BIN)/lib
>  #
>  # Order is important: from higher level to lower level
>  #
> -
> -ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> -_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
> -endif
> -
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE)       += -lrte_pipeline
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE)          += -lrte_table
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PORT)           += -lrte_port
> @@ -84,6 +79,10 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -
> lrte_power
> 
>  _LDLIBS-y += --whole-archive
> 
> +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
> +endif
> +
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER)          += -lrte_timer
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_HASH)           += -lrte_hash
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lrte_vhost
> @@ -114,6 +113,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       += -
> lrte_pmd_enic
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD)       += -lrte_pmd_i40e
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -
> libverbs
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -
> libverbs
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD)      += -lrte_pmd_mpipe -lgxio
> --
> 2.7.4
Ferruh Yigit Nov. 4, 2016, 12:21 p.m. UTC | #2
Hi Yong,

Thank you for the review.

On 11/3/2016 1:24 AM, Yong Wang wrote:
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ferruh Yigit
>> Sent: Monday, October 10, 2016 6:20 AM
>> To: dev@dpdk.org
>> Cc: Ferruh Yigit <ferruh.yigit@intel.com>
>> Subject: [dpdk-dev] [PATCH v3] net/kni: add KNI PMD
>>
>> Add KNI PMD which wraps librte_kni for ease of use.
>>
>> KNI PMD can be used as any regular PMD to send / receive packets to the
>> Linux networking stack.
>>
>> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
>> ---
>>
>> v3:
>> * rebase on top of latest master
>>
>> v2:
>> * updated driver name eth_kni -> net_kni
>> ---

<...>

>>  CONFIG_RTE_LIBRTE_KNI=n
>> +CONFIG_RTE_LIBRTE_PMD_KNI=n
> 
> Nit: change this to CONFIG_RTE_LIBRTE_KNI_PMD instead to be consistent with all other pmds.

There is an inconsistency between virtual and physical PMD config options.

Physical ones: xxx_PMD=
*IXGBE_PMD, *I40E_PMD, *ENA_PMD, ...

Virtual ones: PMD_xxx=
*PMD_RING, *PMD_PCAP, *PMD_NULL, ...

So I am consistent with inconsistency J

<...>

>> +#define DRV_NAME net_kni
> 
> The name generated this way is not consistent with other vdevs.  Why not simply assign "KNI PMD" to drv_name?

Right, it is not consistent but intentionaly.

With macro RTE_PMD_REGISTER_VDEV(net_kni, xxx), rte_driver.name set to
"net_kni"

and if you set drivername to "KNI PMD", pmd will report driver name as
"KNI PMD"

so there will be two different driver names, I tried to unify them to a
single name.
And some physical drivers already does same thing.


<...>

>> +static uint16_t
>> +eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>> +{
>> +	struct pmd_queue *kni_q = q;
>> +	struct rte_kni *kni = kni_q->internals->kni;
>> +	uint16_t nb_pkts;
>> +
>> +	nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
>> +
>> +	kni_q->rx.pkts += nb_pkts;
>> +	kni_q->rx.err_pkts += nb_bufs - nb_pkts;
>> +
>> +	return nb_pkts;
>> +}
>> +
> 
> I don't think it's safe to do receive from two queues concurrently on two cores sharing the same underlying KNI device due to the current limitation of KNI user-space queues not being multi-thread safe.

You are right, above code is not safe.
It is possible to create a KNI interface per queue, but I don't see any
advantage of this against creating a new virtual KNI port.

So I will limit to single queue.

> Is the proposed plan to have the application layer implement
synchronization logic?
> If that's the case, it needs to be clearly documented and depending on
the implementation, measurable overhead will be incurred.
> Otherwise (only single-queue supported), could you check queue number
if the application tries to configure multi-queue?
> 



<...>

>> +static struct rte_eth_dev *
>> +eth_kni_create(const char *name, unsigned int numa_node)
>> +{
>> +	struct pmd_internals *internals = NULL;
>> +	struct rte_eth_dev_data *data;
>> +	struct rte_eth_dev *eth_dev;
>> +	uint16_t nb_rx_queues = 1;
>> +	uint16_t nb_tx_queues = 1;
> 
> Since these two values are always 1 here, I think they could be removed.

I will remove them.


Thanks,
ferruh
diff mbox

Patch

diff --git a/config/common_base b/config/common_base
index f5d2eff..03b93c7 100644
--- a/config/common_base
+++ b/config/common_base
@@ -543,6 +543,7 @@  CONFIG_RTE_PIPELINE_STATS_COLLECT=n
 # Compile librte_kni
 #
 CONFIG_RTE_LIBRTE_KNI=n
+CONFIG_RTE_LIBRTE_PMD_KNI=n
 CONFIG_RTE_KNI_KMOD=n
 CONFIG_RTE_KNI_PREEMPT_DEFAULT=y
 CONFIG_RTE_KNI_KO_DEBUG=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..2ecd510 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -39,6 +39,7 @@  CONFIG_RTE_EAL_IGB_UIO=y
 CONFIG_RTE_EAL_VFIO=y
 CONFIG_RTE_KNI_KMOD=y
 CONFIG_RTE_LIBRTE_KNI=y
+CONFIG_RTE_LIBRTE_PMD_KNI=y
 CONFIG_RTE_LIBRTE_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..c4771cd 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -41,6 +41,7 @@  DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
 DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
 DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
new file mode 100644
index 0000000..0b7cf91
--- /dev/null
+++ b/drivers/net/kni/Makefile
@@ -0,0 +1,63 @@ 
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_kni.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lpthread
+
+EXPORT_MAP := rte_pmd_kni_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += rte_eth_kni.c
+
+#
+# Export include files
+#
+SYMLINK-y-include +=
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_kni
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mempool
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
new file mode 100644
index 0000000..ce9e758
--- /dev/null
+++ b/drivers/net/kni/rte_eth_kni.c
@@ -0,0 +1,463 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_kni.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+
+#define KNI_MAX_QUEUE_PER_PORT 8
+#define MAX_PACKET_SZ 2048
+#define MAX_KNI_PORTS 8
+#define DRV_NAME net_kni
+
+struct pmd_queue_stats {
+	uint64_t pkts;
+	uint64_t bytes;
+	uint64_t err_pkts;
+};
+
+struct pmd_queue {
+	struct pmd_internals *internals;
+	struct rte_mempool *mb_pool;
+
+	struct pmd_queue_stats rx;
+	struct pmd_queue_stats tx;
+};
+
+struct pmd_internals {
+	struct rte_kni *kni;
+	int is_kni_started;
+
+	pthread_t thread;
+	int stop_thread;
+
+	struct pmd_queue rx_queues[KNI_MAX_QUEUE_PER_PORT];
+	struct pmd_queue tx_queues[KNI_MAX_QUEUE_PER_PORT];
+};
+
+static struct ether_addr eth_addr;
+static const char *drivername = RTE_STR(DRV_NAME);
+static struct rte_eth_link pmd_link = {
+		.link_speed = 10000,
+		.link_duplex = ETH_LINK_FULL_DUPLEX,
+		.link_status = 0
+};
+static int is_kni_initialized;
+
+static uint16_t
+eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
+{
+	struct pmd_queue *kni_q = q;
+	struct rte_kni *kni = kni_q->internals->kni;
+	uint16_t nb_pkts;
+
+	nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
+
+	kni_q->rx.pkts += nb_pkts;
+	kni_q->rx.err_pkts += nb_bufs - nb_pkts;
+
+	return nb_pkts;
+}
+
+static uint16_t
+eth_kni_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
+{
+	struct pmd_queue *kni_q = q;
+	struct rte_kni *kni = kni_q->internals->kni;
+	uint16_t nb_pkts;
+
+	nb_pkts =  rte_kni_tx_burst(kni, bufs, nb_bufs);
+
+	kni_q->tx.pkts += nb_pkts;
+	kni_q->tx.err_pkts += nb_bufs - nb_pkts;
+
+	return nb_pkts;
+}
+
+static void *
+kni_handle_request(void *param)
+{
+	struct pmd_internals *internals = param;
+#define MS 1000
+
+	while (!internals->stop_thread) {
+		rte_kni_handle_request(internals->kni);
+		usleep(500 * MS);
+	}
+
+	return param;
+}
+
+static int
+eth_kni_start(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t port_id = dev->data->port_id;
+	struct rte_mempool *mb_pool;
+	struct rte_kni_conf conf;
+	const char *name = dev->data->name + 4; /* remove eth_ */
+
+	snprintf(conf.name, RTE_KNI_NAMESIZE, "%s", name);
+	conf.force_bind = 0;
+	conf.group_id = port_id;
+	conf.mbuf_size = MAX_PACKET_SZ;
+	mb_pool = internals->rx_queues[0].mb_pool;
+
+	internals->kni = rte_kni_alloc(mb_pool, &conf, NULL);
+	if (internals->kni == NULL) {
+		RTE_LOG(ERR, PMD,
+			"Fail to create kni for port: %d\n", port_id);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+eth_kni_dev_start(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	if (internals->is_kni_started == 0) {
+		ret = eth_kni_start(dev);
+		if (ret)
+			return -1;
+		internals->is_kni_started = 1;
+	}
+
+	ret = pthread_create(&internals->thread, NULL, kni_handle_request,
+			internals);
+	if (ret) {
+		RTE_LOG(ERR, PMD, "Fail to create kni request thread\n");
+		return -1;
+	}
+
+	dev->data->dev_link.link_status = 1;
+
+	return 0;
+}
+
+static void
+eth_kni_dev_stop(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	internals->stop_thread = 1;
+
+	ret = pthread_cancel(internals->thread);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
+
+	ret = pthread_join(internals->thread, NULL);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Can't join the thread\n");
+
+	internals->stop_thread = 0;
+
+	dev->data->dev_link.link_status = 0;
+}
+
+static int
+eth_kni_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+eth_kni_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct rte_eth_dev_data *data = dev->data;
+	struct pmd_internals *internals = data->dev_private;
+
+	dev_info->driver_name = data->drv_name;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)-1;
+	dev_info->max_rx_queues = RTE_DIM(internals->rx_queues);
+	dev_info->max_tx_queues = RTE_DIM(internals->tx_queues);
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static int
+eth_kni_rx_queue_setup(struct rte_eth_dev *dev,
+		uint16_t rx_queue_id,
+		uint16_t nb_rx_desc __rte_unused,
+		unsigned int socket_id __rte_unused,
+		const struct rte_eth_rxconf *rx_conf __rte_unused,
+		struct rte_mempool *mb_pool)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_queue *q;
+
+	q = &internals->rx_queues[rx_queue_id];
+	q->internals = internals;
+	q->mb_pool = mb_pool;
+
+	dev->data->rx_queues[rx_queue_id] = q;
+
+	return 0;
+}
+
+static int
+eth_kni_tx_queue_setup(struct rte_eth_dev *dev,
+		uint16_t tx_queue_id,
+		uint16_t nb_tx_desc __rte_unused,
+		unsigned int socket_id __rte_unused,
+		const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_queue *q;
+
+	q = &internals->tx_queues[tx_queue_id];
+	q->internals = internals;
+
+	dev->data->tx_queues[tx_queue_id] = q;
+
+	return 0;
+}
+
+static void
+eth_kni_queue_release(void *q __rte_unused)
+{
+}
+
+static int
+eth_kni_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static void
+eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
+	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
+	struct rte_eth_dev_data *data = dev->data;
+	unsigned long tx_packets_err_total = 0;
+	unsigned int i, num_stats;
+	struct pmd_queue *q;
+
+	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
+			data->nb_rx_queues);
+	for (i = 0; i < num_stats; i++) {
+		q = data->rx_queues[i];
+		stats->q_ipackets[i] = q->rx.pkts;
+		stats->q_ibytes[i] = q->rx.bytes;
+		rx_packets_total += stats->q_ipackets[i];
+		rx_bytes_total += stats->q_ibytes[i];
+	}
+
+	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
+			data->nb_tx_queues);
+	for (i = 0; i < num_stats; i++) {
+		q = data->tx_queues[i];
+		stats->q_opackets[i] = q->tx.pkts;
+		stats->q_obytes[i] = q->tx.bytes;
+		stats->q_errors[i] = q->tx.err_pkts;
+		tx_packets_total += stats->q_opackets[i];
+		tx_bytes_total += stats->q_obytes[i];
+		tx_packets_err_total += stats->q_errors[i];
+	}
+
+	stats->ipackets = rx_packets_total;
+	stats->ibytes = rx_bytes_total;
+	stats->opackets = tx_packets_total;
+	stats->obytes = tx_bytes_total;
+	stats->oerrors = tx_packets_err_total;
+}
+
+static void
+eth_kni_stats_reset(struct rte_eth_dev *dev)
+{
+	struct rte_eth_dev_data *data = dev->data;
+	struct pmd_queue *q;
+	unsigned int i;
+
+	for (i = 0; i < data->nb_rx_queues; i++) {
+		q = data->rx_queues[i];
+		q->rx.pkts = 0;
+		q->rx.bytes = 0;
+	}
+	for (i = 0; i < data->nb_tx_queues; i++) {
+		q = data->tx_queues[i];
+		q->tx.pkts = 0;
+		q->tx.bytes = 0;
+		q->tx.err_pkts = 0;
+	}
+}
+
+static const struct eth_dev_ops eth_kni_ops = {
+	.dev_start = eth_kni_dev_start,
+	.dev_stop = eth_kni_dev_stop,
+	.dev_configure = eth_kni_dev_configure,
+	.dev_infos_get = eth_kni_dev_info,
+	.rx_queue_setup = eth_kni_rx_queue_setup,
+	.tx_queue_setup = eth_kni_tx_queue_setup,
+	.rx_queue_release = eth_kni_queue_release,
+	.tx_queue_release = eth_kni_queue_release,
+	.link_update = eth_kni_link_update,
+	.stats_get = eth_kni_stats_get,
+	.stats_reset = eth_kni_stats_reset,
+};
+
+static struct rte_eth_dev *
+eth_kni_create(const char *name, unsigned int numa_node)
+{
+	struct pmd_internals *internals = NULL;
+	struct rte_eth_dev_data *data;
+	struct rte_eth_dev *eth_dev;
+	uint16_t nb_rx_queues = 1;
+	uint16_t nb_tx_queues = 1;
+
+	RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n",
+			numa_node);
+
+	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
+	if (data == NULL)
+		goto error;
+
+	internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
+	if (internals == NULL)
+		goto error;
+
+	/* reserve an ethdev entry */
+	eth_dev = rte_eth_dev_allocate(name);
+	if (eth_dev == NULL)
+		goto error;
+
+	data->dev_private = internals;
+	data->port_id = eth_dev->data->port_id;
+	memmove(data->name, eth_dev->data->name, sizeof(data->name));
+	data->nb_rx_queues = nb_rx_queues;
+	data->nb_tx_queues = nb_tx_queues;
+	data->dev_link = pmd_link;
+	data->mac_addrs = &eth_addr;
+
+	eth_dev->data = data;
+	eth_dev->dev_ops = &eth_kni_ops;
+	eth_dev->driver = NULL;
+
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = drivername;
+	data->numa_node = numa_node;
+
+	return eth_dev;
+
+error:
+	rte_free(data);
+	rte_free(internals);
+
+	return NULL;
+}
+
+static int
+kni_init(void)
+{
+	if (is_kni_initialized == 0)
+		rte_kni_init(MAX_KNI_PORTS);
+
+	is_kni_initialized += 1;
+
+	return 0;
+}
+
+static int
+eth_kni_probe(const char *name, const char *params __rte_unused)
+{
+	struct rte_eth_dev *eth_dev;
+	int ret;
+
+	RTE_LOG(INFO, PMD, "Initializing eth_kni for %s\n", name);
+
+	ret = kni_init();
+	if (ret < 0)
+		/* Not return error to prevent panic in rte_eal_init() */
+		return 0;
+
+	eth_dev = eth_kni_create(name, rte_socket_id());
+	if (eth_dev == NULL)
+		return -1;
+
+	eth_dev->rx_pkt_burst = eth_kni_rx;
+	eth_dev->tx_pkt_burst = eth_kni_tx;
+
+	return 0;
+}
+
+static int
+eth_kni_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct pmd_internals *internals;
+
+	RTE_LOG(INFO, PMD, "Un-Initializing eth_kni for %s\n", name);
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return -1;
+
+	eth_kni_dev_stop(eth_dev);
+
+	if (eth_dev->data) {
+		internals = eth_dev->data->dev_private;
+		rte_kni_release(internals->kni);
+
+		rte_free(internals);
+	}
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	is_kni_initialized -= 1;
+	if (is_kni_initialized == 0)
+		rte_kni_close();
+
+	return 0;
+}
+
+static struct rte_vdev_driver eth_kni_drv = {
+	.probe = eth_kni_probe,
+	.remove = eth_kni_remove,
+};
+
+DRIVER_REGISTER_VDEV(DRV_NAME, eth_kni_drv);
diff --git a/drivers/net/kni/rte_pmd_kni_version.map b/drivers/net/kni/rte_pmd_kni_version.map
new file mode 100644
index 0000000..61463bf
--- /dev/null
+++ b/drivers/net/kni/rte_pmd_kni_version.map
@@ -0,0 +1,4 @@ 
+DPDK_16.11 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index ac50a21..a94983b 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -59,11 +59,6 @@  _LDLIBS-y += -L$(RTE_SDK_BIN)/lib
 #
 # Order is important: from higher level to lower level
 #
-
-ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
-_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
-endif
-
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE)       += -lrte_pipeline
 _LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE)          += -lrte_table
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PORT)           += -lrte_port
@@ -84,6 +79,10 @@  _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -lrte_power
 
 _LDLIBS-y += --whole-archive
 
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
+endif
+
 _LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER)          += -lrte_timer
 _LDLIBS-$(CONFIG_RTE_LIBRTE_HASH)           += -lrte_hash
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lrte_vhost
@@ -114,6 +113,7 @@  _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       += -lrte_pmd_enic
 _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
 _LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD)       += -lrte_pmd_i40e
 _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -libverbs
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -libverbs
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD)      += -lrte_pmd_mpipe -lgxio